Home | Info | Community | Development | myReactOS | Contact Us
ReactOS Development > Doxygenuri.c
Go to the documentation of this file.
00001 /* 00002 * Copyright 2010 Jacek Caban for CodeWeavers 00003 * Copyright 2010 Thomas Mullaly 00004 * 00005 * This library is free software; you can redistribute it and/or 00006 * modify it under the terms of the GNU Lesser General Public 00007 * License as published by the Free Software Foundation; either 00008 * version 2.1 of the License, or (at your option) any later version. 00009 * 00010 * This library is distributed in the hope that it will be useful, 00011 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00013 * Lesser General Public License for more details. 00014 * 00015 * You should have received a copy of the GNU Lesser General Public 00016 * License along with this library; if not, write to the Free Software 00017 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA 00018 */ 00019 00020 #include "urlmon_main.h" 00021 #include "wine/debug.h" 00022 00023 #define NO_SHLWAPI_REG 00024 #include "shlwapi.h" 00025 00026 #include "strsafe.h" 00027 00028 #define UINT_MAX 0xffffffff 00029 #define USHORT_MAX 0xffff 00030 00031 #define URI_DISPLAY_NO_ABSOLUTE_URI 0x1 00032 #define URI_DISPLAY_NO_DEFAULT_PORT_AUTH 0x2 00033 00034 #define ALLOW_NULL_TERM_SCHEME 0x01 00035 #define ALLOW_NULL_TERM_USER_NAME 0x02 00036 #define ALLOW_NULL_TERM_PASSWORD 0x04 00037 #define ALLOW_BRACKETLESS_IP_LITERAL 0x08 00038 #define SKIP_IP_FUTURE_CHECK 0x10 00039 #define IGNORE_PORT_DELIMITER 0x20 00040 00041 #define RAW_URI_FORCE_PORT_DISP 0x1 00042 #define RAW_URI_CONVERT_TO_DOS_PATH 0x2 00043 00044 #define COMBINE_URI_FORCE_FLAG_USE 0x1 00045 00046 WINE_DEFAULT_DEBUG_CHANNEL(urlmon); 00047 00048 static const IID IID_IUriObj = {0x4b364760,0x9f51,0x11df,{0x98,0x1c,0x08,0x00,0x20,0x0c,0x9a,0x66}}; 00049 00050 typedef struct { 00051 IUri IUri_iface; 00052 IUriBuilderFactory IUriBuilderFactory_iface; 00053 00054 LONG ref; 00055 00056 BSTR raw_uri; 00057 00058 /* Information about the canonicalized URI's buffer. */ 00059 WCHAR *canon_uri; 00060 DWORD canon_size; 00061 DWORD canon_len; 00062 BOOL display_modifiers; 00063 DWORD create_flags; 00064 00065 INT scheme_start; 00066 DWORD scheme_len; 00067 URL_SCHEME scheme_type; 00068 00069 INT userinfo_start; 00070 DWORD userinfo_len; 00071 INT userinfo_split; 00072 00073 INT host_start; 00074 DWORD host_len; 00075 Uri_HOST_TYPE host_type; 00076 00077 INT port_offset; 00078 DWORD port; 00079 BOOL has_port; 00080 00081 INT authority_start; 00082 DWORD authority_len; 00083 00084 INT domain_offset; 00085 00086 INT path_start; 00087 DWORD path_len; 00088 INT extension_offset; 00089 00090 INT query_start; 00091 DWORD query_len; 00092 00093 INT fragment_start; 00094 DWORD fragment_len; 00095 } Uri; 00096 00097 typedef struct { 00098 IUriBuilder IUriBuilder_iface; 00099 LONG ref; 00100 00101 Uri *uri; 00102 DWORD modified_props; 00103 00104 WCHAR *fragment; 00105 DWORD fragment_len; 00106 00107 WCHAR *host; 00108 DWORD host_len; 00109 00110 WCHAR *password; 00111 DWORD password_len; 00112 00113 WCHAR *path; 00114 DWORD path_len; 00115 00116 BOOL has_port; 00117 DWORD port; 00118 00119 WCHAR *query; 00120 DWORD query_len; 00121 00122 WCHAR *scheme; 00123 DWORD scheme_len; 00124 00125 WCHAR *username; 00126 DWORD username_len; 00127 } UriBuilder; 00128 00129 typedef struct { 00130 const WCHAR *str; 00131 DWORD len; 00132 } h16; 00133 00134 typedef struct { 00135 /* IPv6 addresses can hold up to 8 h16 components. */ 00136 h16 components[8]; 00137 DWORD h16_count; 00138 00139 /* An IPv6 can have 1 elision ("::"). */ 00140 const WCHAR *elision; 00141 00142 /* An IPv6 can contain 1 IPv4 address as the last 32bits of the address. */ 00143 const WCHAR *ipv4; 00144 DWORD ipv4_len; 00145 00146 INT components_size; 00147 INT elision_size; 00148 } ipv6_address; 00149 00150 typedef struct { 00151 BSTR uri; 00152 00153 BOOL is_relative; 00154 BOOL is_opaque; 00155 BOOL has_implicit_scheme; 00156 BOOL has_implicit_ip; 00157 UINT implicit_ipv4; 00158 00159 const WCHAR *scheme; 00160 DWORD scheme_len; 00161 URL_SCHEME scheme_type; 00162 00163 const WCHAR *username; 00164 DWORD username_len; 00165 00166 const WCHAR *password; 00167 DWORD password_len; 00168 00169 const WCHAR *host; 00170 DWORD host_len; 00171 Uri_HOST_TYPE host_type; 00172 00173 BOOL has_ipv6; 00174 ipv6_address ipv6_address; 00175 00176 BOOL has_port; 00177 const WCHAR *port; 00178 DWORD port_len; 00179 DWORD port_value; 00180 00181 const WCHAR *path; 00182 DWORD path_len; 00183 00184 const WCHAR *query; 00185 DWORD query_len; 00186 00187 const WCHAR *fragment; 00188 DWORD fragment_len; 00189 } parse_data; 00190 00191 static const CHAR hexDigits[] = "0123456789ABCDEF"; 00192 00193 /* List of scheme types/scheme names that are recognized by the IUri interface as of IE 7. */ 00194 static const struct { 00195 URL_SCHEME scheme; 00196 WCHAR scheme_name[16]; 00197 } recognized_schemes[] = { 00198 {URL_SCHEME_FTP, {'f','t','p',0}}, 00199 {URL_SCHEME_HTTP, {'h','t','t','p',0}}, 00200 {URL_SCHEME_GOPHER, {'g','o','p','h','e','r',0}}, 00201 {URL_SCHEME_MAILTO, {'m','a','i','l','t','o',0}}, 00202 {URL_SCHEME_NEWS, {'n','e','w','s',0}}, 00203 {URL_SCHEME_NNTP, {'n','n','t','p',0}}, 00204 {URL_SCHEME_TELNET, {'t','e','l','n','e','t',0}}, 00205 {URL_SCHEME_WAIS, {'w','a','i','s',0}}, 00206 {URL_SCHEME_FILE, {'f','i','l','e',0}}, 00207 {URL_SCHEME_MK, {'m','k',0}}, 00208 {URL_SCHEME_HTTPS, {'h','t','t','p','s',0}}, 00209 {URL_SCHEME_SHELL, {'s','h','e','l','l',0}}, 00210 {URL_SCHEME_SNEWS, {'s','n','e','w','s',0}}, 00211 {URL_SCHEME_LOCAL, {'l','o','c','a','l',0}}, 00212 {URL_SCHEME_JAVASCRIPT, {'j','a','v','a','s','c','r','i','p','t',0}}, 00213 {URL_SCHEME_VBSCRIPT, {'v','b','s','c','r','i','p','t',0}}, 00214 {URL_SCHEME_ABOUT, {'a','b','o','u','t',0}}, 00215 {URL_SCHEME_RES, {'r','e','s',0}}, 00216 {URL_SCHEME_MSSHELLROOTED, {'m','s','-','s','h','e','l','l','-','r','o','o','t','e','d',0}}, 00217 {URL_SCHEME_MSSHELLIDLIST, {'m','s','-','s','h','e','l','l','-','i','d','l','i','s','t',0}}, 00218 {URL_SCHEME_MSHELP, {'h','c','p',0}}, 00219 {URL_SCHEME_WILDCARD, {'*',0}} 00220 }; 00221 00222 /* List of default ports Windows recognizes. */ 00223 static const struct { 00224 URL_SCHEME scheme; 00225 USHORT port; 00226 } default_ports[] = { 00227 {URL_SCHEME_FTP, 21}, 00228 {URL_SCHEME_HTTP, 80}, 00229 {URL_SCHEME_GOPHER, 70}, 00230 {URL_SCHEME_NNTP, 119}, 00231 {URL_SCHEME_TELNET, 23}, 00232 {URL_SCHEME_WAIS, 210}, 00233 {URL_SCHEME_HTTPS, 443}, 00234 }; 00235 00236 /* List of 3 character top level domain names Windows seems to recognize. 00237 * There might be more, but, these are the only ones I've found so far. 00238 */ 00239 static const struct { 00240 WCHAR tld_name[4]; 00241 } recognized_tlds[] = { 00242 {{'c','o','m',0}}, 00243 {{'e','d','u',0}}, 00244 {{'g','o','v',0}}, 00245 {{'i','n','t',0}}, 00246 {{'m','i','l',0}}, 00247 {{'n','e','t',0}}, 00248 {{'o','r','g',0}} 00249 }; 00250 00251 static Uri *get_uri_obj(IUri *uri) 00252 { 00253 Uri *ret; 00254 HRESULT hres; 00255 00256 hres = IUri_QueryInterface(uri, &IID_IUriObj, (void**)&ret); 00257 return SUCCEEDED(hres) ? ret : NULL; 00258 } 00259 00260 static inline BOOL is_alpha(WCHAR val) { 00261 return ((val >= 'a' && val <= 'z') || (val >= 'A' && val <= 'Z')); 00262 } 00263 00264 static inline BOOL is_num(WCHAR val) { 00265 return (val >= '0' && val <= '9'); 00266 } 00267 00268 static inline BOOL is_drive_path(const WCHAR *str) { 00269 return (is_alpha(str[0]) && (str[1] == ':' || str[1] == '|')); 00270 } 00271 00272 static inline BOOL is_unc_path(const WCHAR *str) { 00273 return (str[0] == '\\' && str[0] == '\\'); 00274 } 00275 00276 static inline BOOL is_forbidden_dos_path_char(WCHAR val) { 00277 return (val == '>' || val == '<' || val == '\"'); 00278 } 00279 00280 /* A URI is implicitly a file path if it begins with 00281 * a drive letter (eg X:) or starts with "\\" (UNC path). 00282 */ 00283 static inline BOOL is_implicit_file_path(const WCHAR *str) { 00284 return (is_unc_path(str) || (is_alpha(str[0]) && str[1] == ':')); 00285 } 00286 00287 /* Checks if the URI is a hierarchical URI. A hierarchical 00288 * URI is one that has "//" after the scheme. 00289 */ 00290 static BOOL check_hierarchical(const WCHAR **ptr) { 00291 const WCHAR *start = *ptr; 00292 00293 if(**ptr != '/') 00294 return FALSE; 00295 00296 ++(*ptr); 00297 if(**ptr != '/') { 00298 *ptr = start; 00299 return FALSE; 00300 } 00301 00302 ++(*ptr); 00303 return TRUE; 00304 } 00305 00306 /* unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" */ 00307 static inline BOOL is_unreserved(WCHAR val) { 00308 return (is_alpha(val) || is_num(val) || val == '-' || val == '.' || 00309 val == '_' || val == '~'); 00310 } 00311 00312 /* sub-delims = "!" / "$" / "&" / "'" / "(" / ")" 00313 * / "*" / "+" / "," / ";" / "=" 00314 */ 00315 static inline BOOL is_subdelim(WCHAR val) { 00316 return (val == '!' || val == '$' || val == '&' || 00317 val == '\'' || val == '(' || val == ')' || 00318 val == '*' || val == '+' || val == ',' || 00319 val == ';' || val == '='); 00320 } 00321 00322 /* gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" */ 00323 static inline BOOL is_gendelim(WCHAR val) { 00324 return (val == ':' || val == '/' || val == '?' || 00325 val == '#' || val == '[' || val == ']' || 00326 val == '@'); 00327 } 00328 00329 /* Characters that delimit the end of the authority 00330 * section of a URI. Sometimes a '\\' is considered 00331 * an authority delimeter. 00332 */ 00333 static inline BOOL is_auth_delim(WCHAR val, BOOL acceptSlash) { 00334 return (val == '#' || val == '/' || val == '?' || 00335 val == '\0' || (acceptSlash && val == '\\')); 00336 } 00337 00338 /* reserved = gen-delims / sub-delims */ 00339 static inline BOOL is_reserved(WCHAR val) { 00340 return (is_subdelim(val) || is_gendelim(val)); 00341 } 00342 00343 static inline BOOL is_hexdigit(WCHAR val) { 00344 return ((val >= 'a' && val <= 'f') || 00345 (val >= 'A' && val <= 'F') || 00346 (val >= '0' && val <= '9')); 00347 } 00348 00349 static inline BOOL is_path_delim(WCHAR val) { 00350 return (!val || val == '#' || val == '?'); 00351 } 00352 00353 static inline BOOL is_slash(WCHAR c) 00354 { 00355 return c == '/' || c == '\\'; 00356 } 00357 00358 static BOOL is_default_port(URL_SCHEME scheme, DWORD port) { 00359 DWORD i; 00360 00361 for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) { 00362 if(default_ports[i].scheme == scheme && default_ports[i].port) 00363 return TRUE; 00364 } 00365 00366 return FALSE; 00367 } 00368 00369 /* List of schemes types Windows seems to expect to be hierarchical. */ 00370 static inline BOOL is_hierarchical_scheme(URL_SCHEME type) { 00371 return(type == URL_SCHEME_HTTP || type == URL_SCHEME_FTP || 00372 type == URL_SCHEME_GOPHER || type == URL_SCHEME_NNTP || 00373 type == URL_SCHEME_TELNET || type == URL_SCHEME_WAIS || 00374 type == URL_SCHEME_FILE || type == URL_SCHEME_HTTPS || 00375 type == URL_SCHEME_RES); 00376 } 00377 00378 /* Checks if 'flags' contains an invalid combination of Uri_CREATE flags. */ 00379 static inline BOOL has_invalid_flag_combination(DWORD flags) { 00380 return((flags & Uri_CREATE_DECODE_EXTRA_INFO && flags & Uri_CREATE_NO_DECODE_EXTRA_INFO) || 00381 (flags & Uri_CREATE_CANONICALIZE && flags & Uri_CREATE_NO_CANONICALIZE) || 00382 (flags & Uri_CREATE_CRACK_UNKNOWN_SCHEMES && flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES) || 00383 (flags & Uri_CREATE_PRE_PROCESS_HTML_URI && flags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI) || 00384 (flags & Uri_CREATE_IE_SETTINGS && flags & Uri_CREATE_NO_IE_SETTINGS)); 00385 } 00386 00387 /* Applies each default Uri_CREATE flags to 'flags' if it 00388 * doesn't cause a flag conflict. 00389 */ 00390 static void apply_default_flags(DWORD *flags) { 00391 if(!(*flags & Uri_CREATE_NO_CANONICALIZE)) 00392 *flags |= Uri_CREATE_CANONICALIZE; 00393 if(!(*flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) 00394 *flags |= Uri_CREATE_DECODE_EXTRA_INFO; 00395 if(!(*flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES)) 00396 *flags |= Uri_CREATE_CRACK_UNKNOWN_SCHEMES; 00397 if(!(*flags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI)) 00398 *flags |= Uri_CREATE_PRE_PROCESS_HTML_URI; 00399 if(!(*flags & Uri_CREATE_IE_SETTINGS)) 00400 *flags |= Uri_CREATE_NO_IE_SETTINGS; 00401 } 00402 00403 /* Determines if the URI is hierarchical using the information already parsed into 00404 * data and using the current location of parsing in the URI string. 00405 * 00406 * Windows considers a URI hierarchical if on of the following is true: 00407 * A.) It's a wildcard scheme. 00408 * B.) It's an implicit file scheme. 00409 * C.) It's a known hierarchical scheme and it has two '\\' after the scheme name. 00410 * (the '\\' will be converted into "//" during canonicalization). 00411 * D.) It's not a relative URI and "//" appears after the scheme name. 00412 */ 00413 static inline BOOL is_hierarchical_uri(const WCHAR **ptr, const parse_data *data) { 00414 const WCHAR *start = *ptr; 00415 00416 if(data->scheme_type == URL_SCHEME_WILDCARD) 00417 return TRUE; 00418 else if(data->scheme_type == URL_SCHEME_FILE && data->has_implicit_scheme) 00419 return TRUE; 00420 else if(is_hierarchical_scheme(data->scheme_type) && (*ptr)[0] == '\\' && (*ptr)[1] == '\\') { 00421 *ptr += 2; 00422 return TRUE; 00423 } else if(!data->is_relative && check_hierarchical(ptr)) 00424 return TRUE; 00425 00426 *ptr = start; 00427 return FALSE; 00428 } 00429 00430 /* Checks if the two Uri's are logically equivalent. It's a simple 00431 * comparison, since they are both of type Uri, and it can access 00432 * the properties of each Uri directly without the need to go 00433 * through the "IUri_Get*" interface calls. 00434 */ 00435 static BOOL are_equal_simple(const Uri *a, const Uri *b) { 00436 if(a->scheme_type == b->scheme_type) { 00437 const BOOL known_scheme = a->scheme_type != URL_SCHEME_UNKNOWN; 00438 const BOOL are_hierarchical = 00439 (a->authority_start > -1 && b->authority_start > -1); 00440 00441 if(a->scheme_type == URL_SCHEME_FILE) { 00442 if(a->canon_len == b->canon_len) 00443 return !StrCmpIW(a->canon_uri, b->canon_uri); 00444 } 00445 00446 /* Only compare the scheme names (if any) if their unknown scheme types. */ 00447 if(!known_scheme) { 00448 if((a->scheme_start > -1 && b->scheme_start > -1) && 00449 (a->scheme_len == b->scheme_len)) { 00450 /* Make sure the schemes are the same. */ 00451 if(StrCmpNW(a->canon_uri+a->scheme_start, b->canon_uri+b->scheme_start, a->scheme_len)) 00452 return FALSE; 00453 } else if(a->scheme_len != b->scheme_len) 00454 /* One of the Uri's has a scheme name, while the other doesn't. */ 00455 return FALSE; 00456 } 00457 00458 /* If they have a userinfo component, perform case sensitive compare. */ 00459 if((a->userinfo_start > -1 && b->userinfo_start > -1) && 00460 (a->userinfo_len == b->userinfo_len)) { 00461 if(StrCmpNW(a->canon_uri+a->userinfo_start, b->canon_uri+b->userinfo_start, a->userinfo_len)) 00462 return FALSE; 00463 } else if(a->userinfo_len != b->userinfo_len) 00464 /* One of the Uri's had a userinfo, while the other one doesn't. */ 00465 return FALSE; 00466 00467 /* Check if they have a host name. */ 00468 if((a->host_start > -1 && b->host_start > -1) && 00469 (a->host_len == b->host_len)) { 00470 /* Perform a case insensitive compare if they are a known scheme type. */ 00471 if(known_scheme) { 00472 if(StrCmpNIW(a->canon_uri+a->host_start, b->canon_uri+b->host_start, a->host_len)) 00473 return FALSE; 00474 } else if(StrCmpNW(a->canon_uri+a->host_start, b->canon_uri+b->host_start, a->host_len)) 00475 return FALSE; 00476 } else if(a->host_len != b->host_len) 00477 /* One of the Uri's had a host, while the other one didn't. */ 00478 return FALSE; 00479 00480 if(a->has_port && b->has_port) { 00481 if(a->port != b->port) 00482 return FALSE; 00483 } else if(a->has_port || b->has_port) 00484 /* One had a port, while the other one didn't. */ 00485 return FALSE; 00486 00487 /* Windows is weird with how it handles paths. For example 00488 * One URI could be "http://google.com" (after canonicalization) 00489 * and one could be "http://google.com/" and the IsEqual function 00490 * would still evaluate to TRUE, but, only if they are both hierarchical 00491 * URIs. 00492 */ 00493 if((a->path_start > -1 && b->path_start > -1) && 00494 (a->path_len == b->path_len)) { 00495 if(StrCmpNW(a->canon_uri+a->path_start, b->canon_uri+b->path_start, a->path_len)) 00496 return FALSE; 00497 } else if(are_hierarchical && a->path_len == -1 && b->path_len == 0) { 00498 if(*(a->canon_uri+a->path_start) != '/') 00499 return FALSE; 00500 } else if(are_hierarchical && b->path_len == 1 && a->path_len == 0) { 00501 if(*(b->canon_uri+b->path_start) != '/') 00502 return FALSE; 00503 } else if(a->path_len != b->path_len) 00504 return FALSE; 00505 00506 /* Compare the query strings of the two URIs. */ 00507 if((a->query_start > -1 && b->query_start > -1) && 00508 (a->query_len == b->query_len)) { 00509 if(StrCmpNW(a->canon_uri+a->query_start, b->canon_uri+b->query_start, a->query_len)) 00510 return FALSE; 00511 } else if(a->query_len != b->query_len) 00512 return FALSE; 00513 00514 if((a->fragment_start > -1 && b->fragment_start > -1) && 00515 (a->fragment_len == b->fragment_len)) { 00516 if(StrCmpNW(a->canon_uri+a->fragment_start, b->canon_uri+b->fragment_start, a->fragment_len)) 00517 return FALSE; 00518 } else if(a->fragment_len != b->fragment_len) 00519 return FALSE; 00520 00521 /* If we get here, the two URIs are equivalent. */ 00522 return TRUE; 00523 } 00524 00525 return FALSE; 00526 } 00527 00528 /* Computes the size of the given IPv6 address. 00529 * Each h16 component is 16bits, if there is an IPv4 address, it's 00530 * 32bits. If there's an elision it can be 16bits to 128bits, depending 00531 * on the number of other components. 00532 * 00533 * Modeled after google-url's CheckIPv6ComponentsSize function 00534 */ 00535 static void compute_ipv6_comps_size(ipv6_address *address) { 00536 address->components_size = address->h16_count * 2; 00537 00538 if(address->ipv4) 00539 /* IPv4 address is 4 bytes. */ 00540 address->components_size += 4; 00541 00542 if(address->elision) { 00543 /* An elision can be anywhere from 2 bytes up to 16 bytes. 00544 * It size depends on the size of the h16 and IPv4 components. 00545 */ 00546 address->elision_size = 16 - address->components_size; 00547 if(address->elision_size < 2) 00548 address->elision_size = 2; 00549 } else 00550 address->elision_size = 0; 00551 } 00552 00553 /* Taken from dlls/jscript/lex.c */ 00554 static int hex_to_int(WCHAR val) { 00555 if(val >= '0' && val <= '9') 00556 return val - '0'; 00557 else if(val >= 'a' && val <= 'f') 00558 return val - 'a' + 10; 00559 else if(val >= 'A' && val <= 'F') 00560 return val - 'A' + 10; 00561 00562 return -1; 00563 } 00564 00565 /* Helper function for converting a percent encoded string 00566 * representation of a WCHAR value into its actual WCHAR value. If 00567 * the two characters following the '%' aren't valid hex values then 00568 * this function returns the NULL character. 00569 * 00570 * Eg. 00571 * "%2E" will result in '.' being returned by this function. 00572 */ 00573 static WCHAR decode_pct_val(const WCHAR *ptr) { 00574 WCHAR ret = '\0'; 00575 00576 if(*ptr == '%' && is_hexdigit(*(ptr + 1)) && is_hexdigit(*(ptr + 2))) { 00577 INT a = hex_to_int(*(ptr + 1)); 00578 INT b = hex_to_int(*(ptr + 2)); 00579 00580 ret = a << 4; 00581 ret += b; 00582 } 00583 00584 return ret; 00585 } 00586 00587 /* Helper function for percent encoding a given character 00588 * and storing the encoded value into a given buffer (dest). 00589 * 00590 * It's up to the calling function to ensure that there is 00591 * at least enough space in 'dest' for the percent encoded 00592 * value to be stored (so dest + 3 spaces available). 00593 */ 00594 static inline void pct_encode_val(WCHAR val, WCHAR *dest) { 00595 dest[0] = '%'; 00596 dest[1] = hexDigits[(val >> 4) & 0xf]; 00597 dest[2] = hexDigits[val & 0xf]; 00598 } 00599 00600 /* Attempts to parse the domain name from the host. 00601 * 00602 * This function also includes the Top-level Domain (TLD) name 00603 * of the host when it tries to find the domain name. If it finds 00604 * a valid domain name it will assign 'domain_start' the offset 00605 * into 'host' where the domain name starts. 00606 * 00607 * It's implied that if there is a domain name its range is: 00608 * [host+domain_start, host+host_len). 00609 */ 00610 void find_domain_name(const WCHAR *host, DWORD host_len, 00611 INT *domain_start) { 00612 const WCHAR *last_tld, *sec_last_tld, *end; 00613 00614 end = host+host_len-1; 00615 00616 *domain_start = -1; 00617 00618 /* There has to be at least enough room for a '.' followed by a 00619 * 3 character TLD for a domain to even exist in the host name. 00620 */ 00621 if(host_len < 4) 00622 return; 00623 00624 last_tld = memrchrW(host, '.', host_len); 00625 if(!last_tld) 00626 /* http://hostname -> has no domain name. */ 00627 return; 00628 00629 sec_last_tld = memrchrW(host, '.', last_tld-host); 00630 if(!sec_last_tld) { 00631 /* If the '.' is at the beginning of the host there 00632 * has to be at least 3 characters in the TLD for it 00633 * to be valid. 00634 * Ex: .com -> .com as the domain name. 00635 * .co -> has no domain name. 00636 */ 00637 if(last_tld-host == 0) { 00638 if(end-(last_tld-1) < 3) 00639 return; 00640 } else if(last_tld-host == 3) { 00641 DWORD i; 00642 00643 /* If there's three characters in front of last_tld and 00644 * they are on the list of recognized TLDs, then this 00645 * host doesn't have a domain (since the host only contains 00646 * a TLD name. 00647 * Ex: edu.uk -> has no domain name. 00648 * foo.uk -> foo.uk as the domain name. 00649 */ 00650 for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) { 00651 if(!StrCmpNIW(host, recognized_tlds[i].tld_name, 3)) 00652 return; 00653 } 00654 } else if(last_tld-host < 3) 00655 /* Anything less than 3 characters is considered part 00656 * of the TLD name. 00657 * Ex: ak.uk -> Has no domain name. 00658 */ 00659 return; 00660 00661 /* Otherwise the domain name is the whole host name. */ 00662 *domain_start = 0; 00663 } else if(end+1-last_tld > 3) { 00664 /* If the last_tld has more than 3 characters, then it's automatically 00665 * considered the TLD of the domain name. 00666 * Ex: www.winehq.org.uk.test -> uk.test as the domain name. 00667 */ 00668 *domain_start = (sec_last_tld+1)-host; 00669 } else if(last_tld - (sec_last_tld+1) < 4) { 00670 DWORD i; 00671 /* If the sec_last_tld is 3 characters long it HAS to be on the list of 00672 * recognized to still be considered part of the TLD name, otherwise 00673 * its considered the domain name. 00674 * Ex: www.google.com.uk -> google.com.uk as the domain name. 00675 * www.google.foo.uk -> foo.uk as the domain name. 00676 */ 00677 if(last_tld - (sec_last_tld+1) == 3) { 00678 for(i = 0; i < sizeof(recognized_tlds)/sizeof(recognized_tlds[0]); ++i) { 00679 if(!StrCmpNIW(sec_last_tld+1, recognized_tlds[i].tld_name, 3)) { 00680 const WCHAR *domain = memrchrW(host, '.', sec_last_tld-host); 00681 00682 if(!domain) 00683 *domain_start = 0; 00684 else 00685 *domain_start = (domain+1) - host; 00686 TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start, 00687 (host+host_len)-(host+*domain_start))); 00688 return; 00689 } 00690 } 00691 00692 *domain_start = (sec_last_tld+1)-host; 00693 } else { 00694 /* Since the sec_last_tld is less than 3 characters it's considered 00695 * part of the TLD. 00696 * Ex: www.google.fo.uk -> google.fo.uk as the domain name. 00697 */ 00698 const WCHAR *domain = memrchrW(host, '.', sec_last_tld-host); 00699 00700 if(!domain) 00701 *domain_start = 0; 00702 else 00703 *domain_start = (domain+1) - host; 00704 } 00705 } else { 00706 /* The second to last TLD has more than 3 characters making it 00707 * the domain name. 00708 * Ex: www.google.test.us -> test.us as the domain name. 00709 */ 00710 *domain_start = (sec_last_tld+1)-host; 00711 } 00712 00713 TRACE("Found domain name %s\n", debugstr_wn(host+*domain_start, 00714 (host+host_len)-(host+*domain_start))); 00715 } 00716 00717 /* Removes the dot segments from a hierarchical URIs path component. This 00718 * function performs the removal in place. 00719 * 00720 * This function returns the new length of the path string. 00721 */ 00722 static DWORD remove_dot_segments(WCHAR *path, DWORD path_len) { 00723 WCHAR *out = path; 00724 const WCHAR *in = out; 00725 const WCHAR *end = out + path_len; 00726 DWORD len; 00727 00728 while(in < end) { 00729 /* Move the first path segment in the input buffer to the end of 00730 * the output buffer, and any subsequent characters up to, including 00731 * the next "/" character (if any) or the end of the input buffer. 00732 */ 00733 while(in < end && !is_slash(*in)) 00734 *out++ = *in++; 00735 if(in == end) 00736 break; 00737 *out++ = *in++; 00738 00739 while(in < end) { 00740 if(*in != '.') 00741 break; 00742 00743 /* Handle ending "/." */ 00744 if(in + 1 == end) { 00745 ++in; 00746 break; 00747 } 00748 00749 /* Handle "/./" */ 00750 if(is_slash(in[1])) { 00751 in += 2; 00752 continue; 00753 } 00754 00755 /* If we don't have "/../" or ending "/.." */ 00756 if(in[1] != '.' || (in + 2 != end && !is_slash(in[2]))) 00757 break; 00758 00759 /* Find the slash preceding out pointer and move out pointer to it */ 00760 if(out > path+1 && is_slash(*--out)) 00761 --out; 00762 while(out > path && !is_slash(*(--out))); 00763 if(is_slash(*out)) 00764 ++out; 00765 in += 2; 00766 if(in != end) 00767 ++in; 00768 } 00769 } 00770 00771 len = out - path; 00772 TRACE("(%p %d): Path after dot segments removed %s len=%d\n", path, path_len, 00773 debugstr_wn(path, len), len); 00774 return len; 00775 } 00776 00777 /* Attempts to find the file extension in a given path. */ 00778 static INT find_file_extension(const WCHAR *path, DWORD path_len) { 00779 const WCHAR *end; 00780 00781 for(end = path+path_len-1; end >= path && *end != '/' && *end != '\\'; --end) { 00782 if(*end == '.') 00783 return end-path; 00784 } 00785 00786 return -1; 00787 } 00788 00789 /* Computes the location where the elision should occur in the IPv6 00790 * address using the numerical values of each component stored in 00791 * 'values'. If the address shouldn't contain an elision then 'index' 00792 * is assigned -1 as it's value. Otherwise 'index' will contain the 00793 * starting index (into values) where the elision should be, and 'count' 00794 * will contain the number of cells the elision covers. 00795 * 00796 * NOTES: 00797 * Windows will expand an elision if the elision only represents 1 h16 00798 * component of the address. 00799 * 00800 * Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7] 00801 * 00802 * If the IPv6 address contains an IPv4 address, the IPv4 address is also 00803 * considered for being included as part of an elision if all its components 00804 * are zeros. 00805 * 00806 * Ex: [1:2:3:4:5:6:0.0.0.0] -> [1:2:3:4:5:6::] 00807 */ 00808 static void compute_elision_location(const ipv6_address *address, const USHORT values[8], 00809 INT *index, DWORD *count) { 00810 DWORD i, max_len, cur_len; 00811 INT max_index, cur_index; 00812 00813 max_len = cur_len = 0; 00814 max_index = cur_index = -1; 00815 for(i = 0; i < 8; ++i) { 00816 BOOL check_ipv4 = (address->ipv4 && i == 6); 00817 BOOL is_end = (check_ipv4 || i == 7); 00818 00819 if(check_ipv4) { 00820 /* Check if the IPv4 address contains only zeros. */ 00821 if(values[i] == 0 && values[i+1] == 0) { 00822 if(cur_index == -1) 00823 cur_index = i; 00824 00825 cur_len += 2; 00826 ++i; 00827 } 00828 } else if(values[i] == 0) { 00829 if(cur_index == -1) 00830 cur_index = i; 00831 00832 ++cur_len; 00833 } 00834 00835 if(is_end || values[i] != 0) { 00836 /* We only consider it for an elision if it's 00837 * more than 1 component long. 00838 */ 00839 if(cur_len > 1 && cur_len > max_len) { 00840 /* Found the new elision location. */ 00841 max_len = cur_len; 00842 max_index = cur_index; 00843 } 00844 00845 /* Reset the current range for the next range of zeros. */ 00846 cur_index = -1; 00847 cur_len = 0; 00848 } 00849 } 00850 00851 *index = max_index; 00852 *count = max_len; 00853 } 00854 00855 /* Removes all the leading and trailing white spaces or 00856 * control characters from the URI and removes all control 00857 * characters inside of the URI string. 00858 */ 00859 static BSTR pre_process_uri(LPCWSTR uri) { 00860 BSTR ret; 00861 DWORD len; 00862 const WCHAR *start, *end; 00863 WCHAR *buf, *ptr; 00864 00865 len = lstrlenW(uri); 00866 00867 start = uri; 00868 /* Skip leading controls and whitespace. */ 00869 while(iscntrlW(*start) || isspaceW(*start)) ++start; 00870 00871 end = uri+len-1; 00872 if(start == end) 00873 /* URI consisted only of control/whitespace. */ 00874 ret = SysAllocStringLen(NULL, 0); 00875 else { 00876 while(iscntrlW(*end) || isspaceW(*end)) --end; 00877 00878 buf = heap_alloc(((end+1)-start)*sizeof(WCHAR)); 00879 if(!buf) 00880 return NULL; 00881 00882 for(ptr = buf; start < end+1; ++start) { 00883 if(!iscntrlW(*start)) 00884 *ptr++ = *start; 00885 } 00886 00887 ret = SysAllocStringLen(buf, ptr-buf); 00888 heap_free(buf); 00889 } 00890 00891 return ret; 00892 } 00893 00894 /* Converts the specified IPv4 address into an uint value. 00895 * 00896 * This function assumes that the IPv4 address has already been validated. 00897 */ 00898 static UINT ipv4toui(const WCHAR *ip, DWORD len) { 00899 UINT ret = 0; 00900 DWORD comp_value = 0; 00901 const WCHAR *ptr; 00902 00903 for(ptr = ip; ptr < ip+len; ++ptr) { 00904 if(*ptr == '.') { 00905 ret <<= 8; 00906 ret += comp_value; 00907 comp_value = 0; 00908 } else 00909 comp_value = comp_value*10 + (*ptr-'0'); 00910 } 00911 00912 ret <<= 8; 00913 ret += comp_value; 00914 00915 return ret; 00916 } 00917 00918 /* Converts an IPv4 address in numerical form into it's fully qualified 00919 * string form. This function returns the number of characters written 00920 * to 'dest'. If 'dest' is NULL this function will return the number of 00921 * characters that would have been written. 00922 * 00923 * It's up to the caller to ensure there's enough space in 'dest' for the 00924 * address. 00925 */ 00926 static DWORD ui2ipv4(WCHAR *dest, UINT address) { 00927 static const WCHAR formatW[] = 00928 {'%','u','.','%','u','.','%','u','.','%','u',0}; 00929 DWORD ret = 0; 00930 UCHAR digits[4]; 00931 00932 digits[0] = (address >> 24) & 0xff; 00933 digits[1] = (address >> 16) & 0xff; 00934 digits[2] = (address >> 8) & 0xff; 00935 digits[3] = address & 0xff; 00936 00937 if(!dest) { 00938 WCHAR tmp[16]; 00939 ret = sprintfW(tmp, formatW, digits[0], digits[1], digits[2], digits[3]); 00940 } else 00941 ret = sprintfW(dest, formatW, digits[0], digits[1], digits[2], digits[3]); 00942 00943 return ret; 00944 } 00945 00946 static DWORD ui2str(WCHAR *dest, UINT value) { 00947 static const WCHAR formatW[] = {'%','u',0}; 00948 DWORD ret = 0; 00949 00950 if(!dest) { 00951 WCHAR tmp[11]; 00952 ret = sprintfW(tmp, formatW, value); 00953 } else 00954 ret = sprintfW(dest, formatW, value); 00955 00956 return ret; 00957 } 00958 00959 /* Converts an h16 component (from an IPv6 address) into it's 00960 * numerical value. 00961 * 00962 * This function assumes that the h16 component has already been validated. 00963 */ 00964 static USHORT h16tous(h16 component) { 00965 DWORD i; 00966 USHORT ret = 0; 00967 00968 for(i = 0; i < component.len; ++i) { 00969 ret <<= 4; 00970 ret += hex_to_int(component.str[i]); 00971 } 00972 00973 return ret; 00974 } 00975 00976 /* Converts an IPv6 address into its 128 bits (16 bytes) numerical value. 00977 * 00978 * This function assumes that the ipv6_address has already been validated. 00979 */ 00980 static BOOL ipv6_to_number(const ipv6_address *address, USHORT number[8]) { 00981 DWORD i, cur_component = 0; 00982 BOOL already_passed_elision = FALSE; 00983 00984 for(i = 0; i < address->h16_count; ++i) { 00985 if(address->elision) { 00986 if(address->components[i].str > address->elision && !already_passed_elision) { 00987 /* Means we just passed the elision and need to add its values to 00988 * 'number' before we do anything else. 00989 */ 00990 DWORD j = 0; 00991 for(j = 0; j < address->elision_size; j+=2) 00992 number[cur_component++] = 0; 00993 00994 already_passed_elision = TRUE; 00995 } 00996 } 00997 00998 number[cur_component++] = h16tous(address->components[i]); 00999 } 01000 01001 /* Case when the elision appears after the h16 components. */ 01002 if(!already_passed_elision && address->elision) { 01003 for(i = 0; i < address->elision_size; i+=2) 01004 number[cur_component++] = 0; 01005 } 01006 01007 if(address->ipv4) { 01008 UINT value = ipv4toui(address->ipv4, address->ipv4_len); 01009 01010 if(cur_component != 6) { 01011 ERR("(%p %p): Failed sanity check with %d\n", address, number, cur_component); 01012 return FALSE; 01013 } 01014 01015 number[cur_component++] = (value >> 16) & 0xffff; 01016 number[cur_component] = value & 0xffff; 01017 } 01018 01019 return TRUE; 01020 } 01021 01022 /* Checks if the characters pointed to by 'ptr' are 01023 * a percent encoded data octet. 01024 * 01025 * pct-encoded = "%" HEXDIG HEXDIG 01026 */ 01027 static BOOL check_pct_encoded(const WCHAR **ptr) { 01028 const WCHAR *start = *ptr; 01029 01030 if(**ptr != '%') 01031 return FALSE; 01032 01033 ++(*ptr); 01034 if(!is_hexdigit(**ptr)) { 01035 *ptr = start; 01036 return FALSE; 01037 } 01038 01039 ++(*ptr); 01040 if(!is_hexdigit(**ptr)) { 01041 *ptr = start; 01042 return FALSE; 01043 } 01044 01045 ++(*ptr); 01046 return TRUE; 01047 } 01048 01049 /* dec-octet = DIGIT ; 0-9 01050 * / %x31-39 DIGIT ; 10-99 01051 * / "1" 2DIGIT ; 100-199 01052 * / "2" %x30-34 DIGIT ; 200-249 01053 * / "25" %x30-35 ; 250-255 01054 */ 01055 static BOOL check_dec_octet(const WCHAR **ptr) { 01056 const WCHAR *c1, *c2, *c3; 01057 01058 c1 = *ptr; 01059 /* A dec-octet must be at least 1 digit long. */ 01060 if(*c1 < '0' || *c1 > '9') 01061 return FALSE; 01062 01063 ++(*ptr); 01064 01065 c2 = *ptr; 01066 /* Since the 1 digit requirment was meet, it doesn't 01067 * matter if this is a DIGIT value, it's considered a 01068 * dec-octet. 01069 */ 01070 if(*c2 < '0' || *c2 > '9') 01071 return TRUE; 01072 01073 ++(*ptr); 01074 01075 c3 = *ptr; 01076 /* Same explanation as above. */ 01077 if(*c3 < '0' || *c3 > '9') 01078 return TRUE; 01079 01080 /* Anything > 255 isn't a valid IP dec-octet. */ 01081 if(*c1 >= '2' && *c2 >= '5' && *c3 >= '5') { 01082 *ptr = c1; 01083 return FALSE; 01084 } 01085 01086 ++(*ptr); 01087 return TRUE; 01088 } 01089 01090 /* Checks if there is an implicit IPv4 address in the host component of the URI. 01091 * The max value of an implicit IPv4 address is UINT_MAX. 01092 * 01093 * Ex: 01094 * "234567" would be considered an implicit IPv4 address. 01095 */ 01096 static BOOL check_implicit_ipv4(const WCHAR **ptr, UINT *val) { 01097 const WCHAR *start = *ptr; 01098 ULONGLONG ret = 0; 01099 *val = 0; 01100 01101 while(is_num(**ptr)) { 01102 ret = ret*10 + (**ptr - '0'); 01103 01104 if(ret > UINT_MAX) { 01105 *ptr = start; 01106 return FALSE; 01107 } 01108 ++(*ptr); 01109 } 01110 01111 if(*ptr == start) 01112 return FALSE; 01113 01114 *val = ret; 01115 return TRUE; 01116 } 01117 01118 /* Checks if the string contains an IPv4 address. 01119 * 01120 * This function has a strict mode or a non-strict mode of operation 01121 * When 'strict' is set to FALSE this function will return TRUE if 01122 * the string contains at least 'dec-octet "." dec-octet' since partial 01123 * IPv4 addresses will be normalized out into full IPv4 addresses. When 01124 * 'strict' is set this function expects there to be a full IPv4 address. 01125 * 01126 * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet 01127 */ 01128 static BOOL check_ipv4address(const WCHAR **ptr, BOOL strict) { 01129 const WCHAR *start = *ptr; 01130 01131 if(!check_dec_octet(ptr)) { 01132 *ptr = start; 01133 return FALSE; 01134 } 01135 01136 if(**ptr != '.') { 01137 *ptr = start; 01138 return FALSE; 01139 } 01140 01141 ++(*ptr); 01142 if(!check_dec_octet(ptr)) { 01143 *ptr = start; 01144 return FALSE; 01145 } 01146 01147 if(**ptr != '.') { 01148 if(strict) { 01149 *ptr = start; 01150 return FALSE; 01151 } else 01152 return TRUE; 01153 } 01154 01155 ++(*ptr); 01156 if(!check_dec_octet(ptr)) { 01157 *ptr = start; 01158 return FALSE; 01159 } 01160 01161 if(**ptr != '.') { 01162 if(strict) { 01163 *ptr = start; 01164 return FALSE; 01165 } else 01166 return TRUE; 01167 } 01168 01169 ++(*ptr); 01170 if(!check_dec_octet(ptr)) { 01171 *ptr = start; 01172 return FALSE; 01173 } 01174 01175 /* Found a four digit ip address. */ 01176 return TRUE; 01177 } 01178 /* Tries to parse the scheme name of the URI. 01179 * 01180 * scheme = ALPHA *(ALPHA | NUM | '+' | '-' | '.') as defined by RFC 3896. 01181 * NOTE: Windows accepts a number as the first character of a scheme. 01182 */ 01183 static BOOL parse_scheme_name(const WCHAR **ptr, parse_data *data, DWORD extras) { 01184 const WCHAR *start = *ptr; 01185 01186 data->scheme = NULL; 01187 data->scheme_len = 0; 01188 01189 while(**ptr) { 01190 if(**ptr == '*' && *ptr == start) { 01191 /* Might have found a wildcard scheme. If it is the next 01192 * char has to be a ':' for it to be a valid URI 01193 */ 01194 ++(*ptr); 01195 break; 01196 } else if(!is_num(**ptr) && !is_alpha(**ptr) && **ptr != '+' && 01197 **ptr != '-' && **ptr != '.') 01198 break; 01199 01200 (*ptr)++; 01201 } 01202 01203 if(*ptr == start) 01204 return FALSE; 01205 01206 /* Schemes must end with a ':' */ 01207 if(**ptr != ':' && !((extras & ALLOW_NULL_TERM_SCHEME) && !**ptr)) { 01208 *ptr = start; 01209 return FALSE; 01210 } 01211 01212 data->scheme = start; 01213 data->scheme_len = *ptr - start; 01214 01215 ++(*ptr); 01216 return TRUE; 01217 } 01218 01219 /* Tries to deduce the corresponding URL_SCHEME for the given URI. Stores 01220 * the deduced URL_SCHEME in data->scheme_type. 01221 */ 01222 static BOOL parse_scheme_type(parse_data *data) { 01223 /* If there's scheme data then see if it's a recognized scheme. */ 01224 if(data->scheme && data->scheme_len) { 01225 DWORD i; 01226 01227 for(i = 0; i < sizeof(recognized_schemes)/sizeof(recognized_schemes[0]); ++i) { 01228 if(lstrlenW(recognized_schemes[i].scheme_name) == data->scheme_len) { 01229 /* Has to be a case insensitive compare. */ 01230 if(!StrCmpNIW(recognized_schemes[i].scheme_name, data->scheme, data->scheme_len)) { 01231 data->scheme_type = recognized_schemes[i].scheme; 01232 return TRUE; 01233 } 01234 } 01235 } 01236 01237 /* If we get here it means it's not a recognized scheme. */ 01238 data->scheme_type = URL_SCHEME_UNKNOWN; 01239 return TRUE; 01240 } else if(data->is_relative) { 01241 /* Relative URI's have no scheme. */ 01242 data->scheme_type = URL_SCHEME_UNKNOWN; 01243 return TRUE; 01244 } else { 01245 /* Should never reach here! what happened... */ 01246 FIXME("(%p): Unable to determine scheme type for URI %s\n", data, debugstr_w(data->uri)); 01247 return FALSE; 01248 } 01249 } 01250 01251 /* Tries to parse (or deduce) the scheme_name of a URI. If it can't 01252 * parse a scheme from the URI it will try to deduce the scheme_name and scheme_type 01253 * using the flags specified in 'flags' (if any). Flags that affect how this function 01254 * operates are the Uri_CREATE_ALLOW_* flags. 01255 * 01256 * All parsed/deduced information will be stored in 'data' when the function returns. 01257 * 01258 * Returns TRUE if it was able to successfully parse the information. 01259 */ 01260 static BOOL parse_scheme(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 01261 static const WCHAR fileW[] = {'f','i','l','e',0}; 01262 static const WCHAR wildcardW[] = {'*',0}; 01263 01264 /* First check to see if the uri could implicitly be a file path. */ 01265 if(is_implicit_file_path(*ptr)) { 01266 if(flags & Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME) { 01267 data->scheme = fileW; 01268 data->scheme_len = lstrlenW(fileW); 01269 data->has_implicit_scheme = TRUE; 01270 01271 TRACE("(%p %p %x): URI is an implicit file path.\n", ptr, data, flags); 01272 } else { 01273 /* Window's does not consider anything that can implicitly be a file 01274 * path to be a valid URI if the ALLOW_IMPLICIT_FILE_SCHEME flag is not set... 01275 */ 01276 TRACE("(%p %p %x): URI is implicitly a file path, but, the ALLOW_IMPLICIT_FILE_SCHEME flag wasn't set.\n", 01277 ptr, data, flags); 01278 return FALSE; 01279 } 01280 } else if(!parse_scheme_name(ptr, data, extras)) { 01281 /* No Scheme was found, this means it could be: 01282 * a) an implicit Wildcard scheme 01283 * b) a relative URI 01284 * c) a invalid URI. 01285 */ 01286 if(flags & Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME) { 01287 data->scheme = wildcardW; 01288 data->scheme_len = lstrlenW(wildcardW); 01289 data->has_implicit_scheme = TRUE; 01290 01291 TRACE("(%p %p %x): URI is an implicit wildcard scheme.\n", ptr, data, flags); 01292 } else if (flags & Uri_CREATE_ALLOW_RELATIVE) { 01293 data->is_relative = TRUE; 01294 TRACE("(%p %p %x): URI is relative.\n", ptr, data, flags); 01295 } else { 01296 TRACE("(%p %p %x): Malformed URI found. Unable to deduce scheme name.\n", ptr, data, flags); 01297 return FALSE; 01298 } 01299 } 01300 01301 if(!data->is_relative) 01302 TRACE("(%p %p %x): Found scheme=%s scheme_len=%d\n", ptr, data, flags, 01303 debugstr_wn(data->scheme, data->scheme_len), data->scheme_len); 01304 01305 if(!parse_scheme_type(data)) 01306 return FALSE; 01307 01308 TRACE("(%p %p %x): Assigned %d as the URL_SCHEME.\n", ptr, data, flags, data->scheme_type); 01309 return TRUE; 01310 } 01311 01312 static BOOL parse_username(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 01313 data->username = *ptr; 01314 01315 while(**ptr != ':' && **ptr != '@') { 01316 if(**ptr == '%') { 01317 if(!check_pct_encoded(ptr)) { 01318 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 01319 *ptr = data->username; 01320 data->username = NULL; 01321 return FALSE; 01322 } 01323 } else 01324 continue; 01325 } else if(extras & ALLOW_NULL_TERM_USER_NAME && !**ptr) 01326 break; 01327 else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) { 01328 *ptr = data->username; 01329 data->username = NULL; 01330 return FALSE; 01331 } 01332 01333 ++(*ptr); 01334 } 01335 01336 data->username_len = *ptr - data->username; 01337 return TRUE; 01338 } 01339 01340 static BOOL parse_password(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 01341 data->password = *ptr; 01342 01343 while(**ptr != '@') { 01344 if(**ptr == '%') { 01345 if(!check_pct_encoded(ptr)) { 01346 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 01347 *ptr = data->password; 01348 data->password = NULL; 01349 return FALSE; 01350 } 01351 } else 01352 continue; 01353 } else if(extras & ALLOW_NULL_TERM_PASSWORD && !**ptr) 01354 break; 01355 else if(is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) { 01356 *ptr = data->password; 01357 data->password = NULL; 01358 return FALSE; 01359 } 01360 01361 ++(*ptr); 01362 } 01363 01364 data->password_len = *ptr - data->password; 01365 return TRUE; 01366 } 01367 01368 /* Parses the userinfo part of the URI (if it exists). The userinfo field of 01369 * a URI can consist of "username:password@", or just "username@". 01370 * 01371 * RFC def: 01372 * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) 01373 * 01374 * NOTES: 01375 * 1) If there is more than one ':' in the userinfo part of the URI Windows 01376 * uses the first occurrence of ':' to delimit the username and password 01377 * components. 01378 * 01379 * ex: 01380 * ftp://user:pass:word@winehq.org 01381 * 01382 * Would yield, "user" as the username and "pass:word" as the password. 01383 * 01384 * 2) Windows allows any character to appear in the "userinfo" part of 01385 * a URI, as long as it's not an authority delimeter character set. 01386 */ 01387 static void parse_userinfo(const WCHAR **ptr, parse_data *data, DWORD flags) { 01388 const WCHAR *start = *ptr; 01389 01390 if(!parse_username(ptr, data, flags, 0)) { 01391 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags); 01392 return; 01393 } 01394 01395 if(**ptr == ':') { 01396 ++(*ptr); 01397 if(!parse_password(ptr, data, flags, 0)) { 01398 *ptr = start; 01399 data->username = NULL; 01400 data->username_len = 0; 01401 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags); 01402 return; 01403 } 01404 } 01405 01406 if(**ptr != '@') { 01407 *ptr = start; 01408 data->username = NULL; 01409 data->username_len = 0; 01410 data->password = NULL; 01411 data->password_len = 0; 01412 01413 TRACE("(%p %p %x): URI contained no userinfo.\n", ptr, data, flags); 01414 return; 01415 } 01416 01417 if(data->username) 01418 TRACE("(%p %p %x): Found username %s len=%d.\n", ptr, data, flags, 01419 debugstr_wn(data->username, data->username_len), data->username_len); 01420 01421 if(data->password) 01422 TRACE("(%p %p %x): Found password %s len=%d.\n", ptr, data, flags, 01423 debugstr_wn(data->password, data->password_len), data->password_len); 01424 01425 ++(*ptr); 01426 } 01427 01428 /* Attempts to parse a port from the URI. 01429 * 01430 * NOTES: 01431 * Windows seems to have a cap on what the maximum value 01432 * for a port can be. The max value is USHORT_MAX. 01433 * 01434 * port = *DIGIT 01435 */ 01436 static BOOL parse_port(const WCHAR **ptr, parse_data *data, DWORD flags) { 01437 UINT port = 0; 01438 data->port = *ptr; 01439 01440 while(!is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN)) { 01441 if(!is_num(**ptr)) { 01442 *ptr = data->port; 01443 data->port = NULL; 01444 return FALSE; 01445 } 01446 01447 port = port*10 + (**ptr-'0'); 01448 01449 if(port > USHORT_MAX) { 01450 *ptr = data->port; 01451 data->port = NULL; 01452 return FALSE; 01453 } 01454 01455 ++(*ptr); 01456 } 01457 01458 data->has_port = TRUE; 01459 data->port_value = port; 01460 data->port_len = *ptr - data->port; 01461 01462 TRACE("(%p %p %x): Found port %s len=%d value=%u\n", ptr, data, flags, 01463 debugstr_wn(data->port, data->port_len), data->port_len, data->port_value); 01464 return TRUE; 01465 } 01466 01467 /* Attempts to parse a IPv4 address from the URI. 01468 * 01469 * NOTES: 01470 * Window's normalizes IPv4 addresses, This means there's three 01471 * possibilities for the URI to contain an IPv4 address. 01472 * 1) A well formed address (ex. 192.2.2.2). 01473 * 2) A partially formed address. For example "192.0" would 01474 * normalize to "192.0.0.0" during canonicalization. 01475 * 3) An implicit IPv4 address. For example "256" would 01476 * normalize to "0.0.1.0" during canonicalization. Also 01477 * note that the maximum value for an implicit IP address 01478 * is UINT_MAX, if the value in the URI exceeds this then 01479 * it is not considered an IPv4 address. 01480 */ 01481 static BOOL parse_ipv4address(const WCHAR **ptr, parse_data *data, DWORD flags) { 01482 const BOOL is_unknown = data->scheme_type == URL_SCHEME_UNKNOWN; 01483 data->host = *ptr; 01484 01485 if(!check_ipv4address(ptr, FALSE)) { 01486 if(!check_implicit_ipv4(ptr, &data->implicit_ipv4)) { 01487 TRACE("(%p %p %x): URI didn't contain anything looking like an IPv4 address.\n", 01488 ptr, data, flags); 01489 *ptr = data->host; 01490 data->host = NULL; 01491 return FALSE; 01492 } else 01493 data->has_implicit_ip = TRUE; 01494 } 01495 01496 /* Check if what we found is the only part of the host name (if it isn't 01497 * we don't have an IPv4 address). 01498 */ 01499 if(**ptr == ':') { 01500 ++(*ptr); 01501 if(!parse_port(ptr, data, flags)) { 01502 *ptr = data->host; 01503 data->host = NULL; 01504 return FALSE; 01505 } 01506 } else if(!is_auth_delim(**ptr, !is_unknown)) { 01507 /* Found more data which belongs the host, so this isn't an IPv4. */ 01508 *ptr = data->host; 01509 data->host = NULL; 01510 data->has_implicit_ip = FALSE; 01511 return FALSE; 01512 } 01513 01514 data->host_len = *ptr - data->host; 01515 data->host_type = Uri_HOST_IPV4; 01516 01517 TRACE("(%p %p %x): IPv4 address found. host=%s host_len=%d host_type=%d\n", 01518 ptr, data, flags, debugstr_wn(data->host, data->host_len), 01519 data->host_len, data->host_type); 01520 return TRUE; 01521 } 01522 01523 /* Attempts to parse the reg-name from the URI. 01524 * 01525 * Because of the way Windows handles ':' this function also 01526 * handles parsing the port. 01527 * 01528 * reg-name = *( unreserved / pct-encoded / sub-delims ) 01529 * 01530 * NOTE: 01531 * Windows allows everything, but, the characters in "auth_delims" and ':' 01532 * to appear in a reg-name, unless it's an unknown scheme type then ':' is 01533 * allowed to appear (even if a valid port isn't after it). 01534 * 01535 * Windows doesn't like host names which start with '[' and end with ']' 01536 * and don't contain a valid IP literal address in between them. 01537 * 01538 * On Windows if an '[' is encountered in the host name the ':' no longer 01539 * counts as a delimiter until you reach the next ']' or an "authority delimeter". 01540 * 01541 * A reg-name CAN be empty. 01542 */ 01543 static BOOL parse_reg_name(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 01544 const BOOL has_start_bracket = **ptr == '['; 01545 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 01546 const BOOL is_res = data->scheme_type == URL_SCHEME_RES; 01547 BOOL inside_brackets = has_start_bracket; 01548 01549 /* res URIs don't have ports. */ 01550 BOOL ignore_col = (extras & IGNORE_PORT_DELIMITER) || is_res; 01551 01552 /* We have to be careful with file schemes. */ 01553 if(data->scheme_type == URL_SCHEME_FILE) { 01554 /* This is because an implicit file scheme could be "C:\\test" and it 01555 * would trick this function into thinking the host is "C", when after 01556 * canonicalization the host would end up being an empty string. A drive 01557 * path can also have a '|' instead of a ':' after the drive letter. 01558 */ 01559 if(is_drive_path(*ptr)) { 01560 /* Regular old drive paths don't have a host type (or host name). */ 01561 data->host_type = Uri_HOST_UNKNOWN; 01562 data->host = *ptr; 01563 data->host_len = 0; 01564 return TRUE; 01565 } else if(is_unc_path(*ptr)) 01566 /* Skip past the "\\" of a UNC path. */ 01567 *ptr += 2; 01568 } 01569 01570 data->host = *ptr; 01571 01572 /* For res URIs, everything before the first '/' is 01573 * considered the host. 01574 */ 01575 while((!is_res && !is_auth_delim(**ptr, known_scheme)) || 01576 (is_res && **ptr && **ptr != '/')) { 01577 if(**ptr == ':' && !ignore_col) { 01578 /* We can ignore ':' if were inside brackets.*/ 01579 if(!inside_brackets) { 01580 const WCHAR *tmp = (*ptr)++; 01581 01582 /* Attempt to parse the port. */ 01583 if(!parse_port(ptr, data, flags)) { 01584 /* Windows expects there to be a valid port for known scheme types. */ 01585 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 01586 *ptr = data->host; 01587 data->host = NULL; 01588 TRACE("(%p %p %x %x): Expected valid port\n", ptr, data, flags, extras); 01589 return FALSE; 01590 } else 01591 /* Windows gives up on trying to parse a port when it 01592 * encounters 1 invalid port. 01593 */ 01594 ignore_col = TRUE; 01595 } else { 01596 data->host_len = tmp - data->host; 01597 break; 01598 } 01599 } 01600 } else if(**ptr == '%' && (known_scheme && !is_res)) { 01601 /* Has to be a legit % encoded value. */ 01602 if(!check_pct_encoded(ptr)) { 01603 *ptr = data->host; 01604 data->host = NULL; 01605 return FALSE; 01606 } else 01607 continue; 01608 } else if(is_res && is_forbidden_dos_path_char(**ptr)) { 01609 *ptr = data->host; 01610 data->host = NULL; 01611 return FALSE; 01612 } else if(**ptr == ']') 01613 inside_brackets = FALSE; 01614 else if(**ptr == '[') 01615 inside_brackets = TRUE; 01616 01617 ++(*ptr); 01618 } 01619 01620 if(has_start_bracket) { 01621 /* Make sure the last character of the host wasn't a ']'. */ 01622 if(*(*ptr-1) == ']') { 01623 TRACE("(%p %p %x %x): Expected an IP literal inside of the host\n", 01624 ptr, data, flags, extras); 01625 *ptr = data->host; 01626 data->host = NULL; 01627 return FALSE; 01628 } 01629 } 01630 01631 /* Don't overwrite our length if we found a port earlier. */ 01632 if(!data->port) 01633 data->host_len = *ptr - data->host; 01634 01635 /* If the host is empty, then it's an unknown host type. */ 01636 if(data->host_len == 0 || is_res) 01637 data->host_type = Uri_HOST_UNKNOWN; 01638 else 01639 data->host_type = Uri_HOST_DNS; 01640 01641 TRACE("(%p %p %x %x): Parsed reg-name. host=%s len=%d\n", ptr, data, flags, extras, 01642 debugstr_wn(data->host, data->host_len), data->host_len); 01643 return TRUE; 01644 } 01645 01646 /* Attempts to parse an IPv6 address out of the URI. 01647 * 01648 * IPv6address = 6( h16 ":" ) ls32 01649 * / "::" 5( h16 ":" ) ls32 01650 * / [ h16 ] "::" 4( h16 ":" ) ls32 01651 * / [ *1( h16 ":" ) h16 ] "::" 3( h16 ":" ) ls32 01652 * / [ *2( h16 ":" ) h16 ] "::" 2( h16 ":" ) ls32 01653 * / [ *3( h16 ":" ) h16 ] "::" h16 ":" ls32 01654 * / [ *4( h16 ":" ) h16 ] "::" ls32 01655 * / [ *5( h16 ":" ) h16 ] "::" h16 01656 * / [ *6( h16 ":" ) h16 ] "::" 01657 * 01658 * ls32 = ( h16 ":" h16 ) / IPv4address 01659 * ; least-significant 32 bits of address. 01660 * 01661 * h16 = 1*4HEXDIG 01662 * ; 16 bits of address represented in hexadecimal. 01663 * 01664 * Modeled after google-url's 'DoParseIPv6' function. 01665 */ 01666 static BOOL parse_ipv6address(const WCHAR **ptr, parse_data *data, DWORD flags) { 01667 const WCHAR *start, *cur_start; 01668 ipv6_address ip; 01669 01670 start = cur_start = *ptr; 01671 memset(&ip, 0, sizeof(ipv6_address)); 01672 01673 for(;; ++(*ptr)) { 01674 /* Check if we're on the last character of the host. */ 01675 BOOL is_end = (is_auth_delim(**ptr, data->scheme_type != URL_SCHEME_UNKNOWN) 01676 || **ptr == ']'); 01677 01678 BOOL is_split = (**ptr == ':'); 01679 BOOL is_elision = (is_split && !is_end && *(*ptr+1) == ':'); 01680 01681 /* Check if we're at the end of a component, or 01682 * if we're at the end of the IPv6 address. 01683 */ 01684 if(is_split || is_end) { 01685 DWORD cur_len = 0; 01686 01687 cur_len = *ptr - cur_start; 01688 01689 /* h16 can't have a length > 4. */ 01690 if(cur_len > 4) { 01691 *ptr = start; 01692 01693 TRACE("(%p %p %x): h16 component to long.\n", 01694 ptr, data, flags); 01695 return FALSE; 01696 } 01697 01698 if(cur_len == 0) { 01699 /* An h16 component can't have the length of 0 unless 01700 * the elision is at the beginning of the address, or 01701 * at the end of the address. 01702 */ 01703 if(!((*ptr == start && is_elision) || 01704 (is_end && (*ptr-2) == ip.elision))) { 01705 *ptr = start; 01706 TRACE("(%p %p %x): IPv6 component cannot have a length of 0.\n", 01707 ptr, data, flags); 01708 return FALSE; 01709 } 01710 } 01711 01712 if(cur_len > 0) { 01713 /* An IPv6 address can have no more than 8 h16 components. */ 01714 if(ip.h16_count >= 8) { 01715 *ptr = start; 01716 TRACE("(%p %p %x): Not a IPv6 address, to many h16 components.\n", 01717 ptr, data, flags); 01718 return FALSE; 01719 } 01720 01721 ip.components[ip.h16_count].str = cur_start; 01722 ip.components[ip.h16_count].len = cur_len; 01723 01724 TRACE("(%p %p %x): Found h16 component %s, len=%d, h16_count=%d\n", 01725 ptr, data, flags, debugstr_wn(cur_start, cur_len), cur_len, 01726 ip.h16_count); 01727 ++ip.h16_count; 01728 } 01729 } 01730 01731 if(is_end) 01732 break; 01733 01734 if(is_elision) { 01735 /* A IPv6 address can only have 1 elision ('::'). */ 01736 if(ip.elision) { 01737 *ptr = start; 01738 01739 TRACE("(%p %p %x): IPv6 address cannot have 2 elisions.\n", 01740 ptr, data, flags); 01741 return FALSE; 01742 } 01743 01744 ip.elision = *ptr; 01745 ++(*ptr); 01746 } 01747 01748 if(is_split) 01749 cur_start = *ptr+1; 01750 else { 01751 if(!check_ipv4address(ptr, TRUE)) { 01752 if(!is_hexdigit(**ptr)) { 01753 /* Not a valid character for an IPv6 address. */ 01754 *ptr = start; 01755 return FALSE; 01756 } 01757 } else { 01758 /* Found an IPv4 address. */ 01759 ip.ipv4 = cur_start; 01760 ip.ipv4_len = *ptr - cur_start; 01761 01762 TRACE("(%p %p %x): Found an attached IPv4 address %s len=%d.\n", 01763 ptr, data, flags, debugstr_wn(ip.ipv4, ip.ipv4_len), 01764 ip.ipv4_len); 01765 01766 /* IPv4 addresses can only appear at the end of a IPv6. */ 01767 break; 01768 } 01769 } 01770 } 01771 01772 compute_ipv6_comps_size(&ip); 01773 01774 /* Make sure the IPv6 address adds up to 16 bytes. */ 01775 if(ip.components_size + ip.elision_size != 16) { 01776 *ptr = start; 01777 TRACE("(%p %p %x): Invalid IPv6 address, did not add up to 16 bytes.\n", 01778 ptr, data, flags); 01779 return FALSE; 01780 } 01781 01782 if(ip.elision_size == 2) { 01783 /* For some reason on Windows if an elision that represents 01784 * only 1 h16 component is encountered at the very begin or 01785 * end of an IPv6 address, Windows does not consider it a 01786 * valid IPv6 address. 01787 * 01788 * Ex: [::2:3:4:5:6:7] is not valid, even though the sum 01789 * of all the components == 128bits. 01790 */ 01791 if(ip.elision < ip.components[0].str || 01792 ip.elision > ip.components[ip.h16_count-1].str) { 01793 *ptr = start; 01794 TRACE("(%p %p %x): Invalid IPv6 address. Detected elision of 2 bytes at the beginning or end of the address.\n", 01795 ptr, data, flags); 01796 return FALSE; 01797 } 01798 } 01799 01800 data->host_type = Uri_HOST_IPV6; 01801 data->has_ipv6 = TRUE; 01802 data->ipv6_address = ip; 01803 01804 TRACE("(%p %p %x): Found valid IPv6 literal %s len=%d\n", 01805 ptr, data, flags, debugstr_wn(start, *ptr-start), 01806 (int)(*ptr-start)); 01807 return TRUE; 01808 } 01809 01810 /* IPvFuture = "v" 1*HEXDIG "." 1*( unreserved / sub-delims / ":" ) */ 01811 static BOOL parse_ipvfuture(const WCHAR **ptr, parse_data *data, DWORD flags) { 01812 const WCHAR *start = *ptr; 01813 01814 /* IPvFuture has to start with a 'v' or 'V'. */ 01815 if(**ptr != 'v' && **ptr != 'V') 01816 return FALSE; 01817 01818 /* Following the v there must be at least 1 hex digit. */ 01819 ++(*ptr); 01820 if(!is_hexdigit(**ptr)) { 01821 *ptr = start; 01822 return FALSE; 01823 } 01824 01825 ++(*ptr); 01826 while(is_hexdigit(**ptr)) 01827 ++(*ptr); 01828 01829 /* End of the hexdigit sequence must be a '.' */ 01830 if(**ptr != '.') { 01831 *ptr = start; 01832 return FALSE; 01833 } 01834 01835 ++(*ptr); 01836 if(!is_unreserved(**ptr) && !is_subdelim(**ptr) && **ptr != ':') { 01837 *ptr = start; 01838 return FALSE; 01839 } 01840 01841 ++(*ptr); 01842 while(is_unreserved(**ptr) || is_subdelim(**ptr) || **ptr == ':') 01843 ++(*ptr); 01844 01845 data->host_type = Uri_HOST_UNKNOWN; 01846 01847 TRACE("(%p %p %x): Parsed IPvFuture address %s len=%d\n", ptr, data, flags, 01848 debugstr_wn(start, *ptr-start), (int)(*ptr-start)); 01849 01850 return TRUE; 01851 } 01852 01853 /* IP-literal = "[" ( IPv6address / IPvFuture ) "]" */ 01854 static BOOL parse_ip_literal(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 01855 data->host = *ptr; 01856 01857 if(**ptr != '[' && !(extras & ALLOW_BRACKETLESS_IP_LITERAL)) { 01858 data->host = NULL; 01859 return FALSE; 01860 } else if(**ptr == '[') 01861 ++(*ptr); 01862 01863 if(!parse_ipv6address(ptr, data, flags)) { 01864 if(extras & SKIP_IP_FUTURE_CHECK || !parse_ipvfuture(ptr, data, flags)) { 01865 *ptr = data->host; 01866 data->host = NULL; 01867 return FALSE; 01868 } 01869 } 01870 01871 if(**ptr != ']' && !(extras & ALLOW_BRACKETLESS_IP_LITERAL)) { 01872 *ptr = data->host; 01873 data->host = NULL; 01874 return FALSE; 01875 } else if(!**ptr && extras & ALLOW_BRACKETLESS_IP_LITERAL) { 01876 /* The IP literal didn't contain brackets and was followed by 01877 * a NULL terminator, so no reason to even check the port. 01878 */ 01879 data->host_len = *ptr - data->host; 01880 return TRUE; 01881 } 01882 01883 ++(*ptr); 01884 if(**ptr == ':') { 01885 ++(*ptr); 01886 /* If a valid port is not found, then let it trickle down to 01887 * parse_reg_name. 01888 */ 01889 if(!parse_port(ptr, data, flags)) { 01890 *ptr = data->host; 01891 data->host = NULL; 01892 return FALSE; 01893 } 01894 } else 01895 data->host_len = *ptr - data->host; 01896 01897 return TRUE; 01898 } 01899 01900 /* Parses the host information from the URI. 01901 * 01902 * host = IP-literal / IPv4address / reg-name 01903 */ 01904 static BOOL parse_host(const WCHAR **ptr, parse_data *data, DWORD flags, DWORD extras) { 01905 if(!parse_ip_literal(ptr, data, flags, extras)) { 01906 if(!parse_ipv4address(ptr, data, flags)) { 01907 if(!parse_reg_name(ptr, data, flags, extras)) { 01908 TRACE("(%p %p %x %x): Malformed URI, Unknown host type.\n", 01909 ptr, data, flags, extras); 01910 return FALSE; 01911 } 01912 } 01913 } 01914 01915 return TRUE; 01916 } 01917 01918 /* Parses the authority information from the URI. 01919 * 01920 * authority = [ userinfo "@" ] host [ ":" port ] 01921 */ 01922 static BOOL parse_authority(const WCHAR **ptr, parse_data *data, DWORD flags) { 01923 parse_userinfo(ptr, data, flags); 01924 01925 /* Parsing the port will happen during one of the host parsing 01926 * routines (if the URI has a port). 01927 */ 01928 if(!parse_host(ptr, data, flags, 0)) 01929 return FALSE; 01930 01931 return TRUE; 01932 } 01933 01934 /* Attempts to parse the path information of a hierarchical URI. */ 01935 static BOOL parse_path_hierarchical(const WCHAR **ptr, parse_data *data, DWORD flags) { 01936 const WCHAR *start = *ptr; 01937 static const WCHAR slash[] = {'/',0}; 01938 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE; 01939 01940 if(is_path_delim(**ptr)) { 01941 if(data->scheme_type == URL_SCHEME_WILDCARD) { 01942 /* Wildcard schemes don't get a '/' attached if their path is 01943 * empty. 01944 */ 01945 data->path = NULL; 01946 data->path_len = 0; 01947 } else if(!(flags & Uri_CREATE_NO_CANONICALIZE)) { 01948 /* If the path component is empty, then a '/' is added. */ 01949 data->path = slash; 01950 data->path_len = 1; 01951 } 01952 } else { 01953 while(!is_path_delim(**ptr)) { 01954 if(**ptr == '%' && data->scheme_type != URL_SCHEME_UNKNOWN && !is_file) { 01955 if(!check_pct_encoded(ptr)) { 01956 *ptr = start; 01957 return FALSE; 01958 } else 01959 continue; 01960 } else if(is_forbidden_dos_path_char(**ptr) && is_file && 01961 (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 01962 /* File schemes with USE_DOS_PATH set aren't allowed to have 01963 * a '<' or '>' or '\"' appear in them. 01964 */ 01965 *ptr = start; 01966 return FALSE; 01967 } else if(**ptr == '\\') { 01968 /* Not allowed to have a backslash if NO_CANONICALIZE is set 01969 * and the scheme is known type (but not a file scheme). 01970 */ 01971 if(flags & Uri_CREATE_NO_CANONICALIZE) { 01972 if(data->scheme_type != URL_SCHEME_FILE && 01973 data->scheme_type != URL_SCHEME_UNKNOWN) { 01974 *ptr = start; 01975 return FALSE; 01976 } 01977 } 01978 } 01979 01980 ++(*ptr); 01981 } 01982 01983 /* The only time a URI doesn't have a path is when 01984 * the NO_CANONICALIZE flag is set and the raw URI 01985 * didn't contain one. 01986 */ 01987 if(*ptr == start) { 01988 data->path = NULL; 01989 data->path_len = 0; 01990 } else { 01991 data->path = start; 01992 data->path_len = *ptr - start; 01993 } 01994 } 01995 01996 if(data->path) 01997 TRACE("(%p %p %x): Parsed path %s len=%d\n", ptr, data, flags, 01998 debugstr_wn(data->path, data->path_len), data->path_len); 01999 else 02000 TRACE("(%p %p %x): The URI contained no path\n", ptr, data, flags); 02001 02002 return TRUE; 02003 } 02004 02005 /* Parses the path of a opaque URI (much less strict then the parser 02006 * for a hierarchical URI). 02007 * 02008 * NOTE: 02009 * Windows allows invalid % encoded data to appear in opaque URI paths 02010 * for unknown scheme types. 02011 * 02012 * File schemes with USE_DOS_PATH set aren't allowed to have '<', '>', or '\"' 02013 * appear in them. 02014 */ 02015 static BOOL parse_path_opaque(const WCHAR **ptr, parse_data *data, DWORD flags) { 02016 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 02017 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE; 02018 02019 data->path = *ptr; 02020 02021 while(!is_path_delim(**ptr)) { 02022 if(**ptr == '%' && known_scheme) { 02023 if(!check_pct_encoded(ptr)) { 02024 *ptr = data->path; 02025 data->path = NULL; 02026 return FALSE; 02027 } else 02028 continue; 02029 } else if(is_forbidden_dos_path_char(**ptr) && is_file && 02030 (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 02031 *ptr = data->path; 02032 data->path = NULL; 02033 return FALSE; 02034 } 02035 02036 ++(*ptr); 02037 } 02038 02039 data->path_len = *ptr - data->path; 02040 TRACE("(%p %p %x): Parsed opaque URI path %s len=%d\n", ptr, data, flags, 02041 debugstr_wn(data->path, data->path_len), data->path_len); 02042 return TRUE; 02043 } 02044 02045 /* Determines how the URI should be parsed after the scheme information. 02046 * 02047 * If the scheme is followed, by "//" then, it is treated as an hierarchical URI 02048 * which then the authority and path information will be parsed out. Otherwise, the 02049 * URI will be treated as an opaque URI which the authority information is not parsed 02050 * out. 02051 * 02052 * RFC 3896 definition of hier-part: 02053 * 02054 * hier-part = "//" authority path-abempty 02055 * / path-absolute 02056 * / path-rootless 02057 * / path-empty 02058 * 02059 * MSDN opaque URI definition: 02060 * scheme ":" path [ "#" fragment ] 02061 * 02062 * NOTES: 02063 * If the URI is of an unknown scheme type and has a "//" following the scheme then it 02064 * is treated as a hierarchical URI, but, if the CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is 02065 * set then it is considered an opaque URI reguardless of what follows the scheme information 02066 * (per MSDN documentation). 02067 */ 02068 static BOOL parse_hierpart(const WCHAR **ptr, parse_data *data, DWORD flags) { 02069 const WCHAR *start = *ptr; 02070 02071 /* Checks if the authority information needs to be parsed. */ 02072 if(is_hierarchical_uri(ptr, data)) { 02073 /* Only treat it as a hierarchical URI if the scheme_type is known or 02074 * the Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES flag is not set. 02075 */ 02076 if(data->scheme_type != URL_SCHEME_UNKNOWN || 02077 !(flags & Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES)) { 02078 TRACE("(%p %p %x): Treating URI as an hierarchical URI.\n", ptr, data, flags); 02079 data->is_opaque = FALSE; 02080 02081 /* TODO: Handle hierarchical URI's, parse authority then parse the path. */ 02082 if(!parse_authority(ptr, data, flags)) 02083 return FALSE; 02084 02085 return parse_path_hierarchical(ptr, data, flags); 02086 } else 02087 /* Reset ptr to it's starting position so opaque path parsing 02088 * begins at the correct location. 02089 */ 02090 *ptr = start; 02091 } 02092 02093 /* If it reaches here, then the URI will be treated as an opaque 02094 * URI. 02095 */ 02096 02097 TRACE("(%p %p %x): Treating URI as an opaque URI.\n", ptr, data, flags); 02098 02099 data->is_opaque = TRUE; 02100 if(!parse_path_opaque(ptr, data, flags)) 02101 return FALSE; 02102 02103 return TRUE; 02104 } 02105 02106 /* Attempts to parse the query string from the URI. 02107 * 02108 * NOTES: 02109 * If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded 02110 * data is allowed appear in the query string. For unknown scheme types 02111 * invalid percent encoded data is allowed to appear reguardless. 02112 */ 02113 static BOOL parse_query(const WCHAR **ptr, parse_data *data, DWORD flags) { 02114 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 02115 02116 if(**ptr != '?') { 02117 TRACE("(%p %p %x): URI didn't contain a query string.\n", ptr, data, flags); 02118 return TRUE; 02119 } 02120 02121 data->query = *ptr; 02122 02123 ++(*ptr); 02124 while(**ptr && **ptr != '#') { 02125 if(**ptr == '%' && known_scheme && 02126 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 02127 if(!check_pct_encoded(ptr)) { 02128 *ptr = data->query; 02129 data->query = NULL; 02130 return FALSE; 02131 } else 02132 continue; 02133 } 02134 02135 ++(*ptr); 02136 } 02137 02138 data->query_len = *ptr - data->query; 02139 02140 TRACE("(%p %p %x): Parsed query string %s len=%d\n", ptr, data, flags, 02141 debugstr_wn(data->query, data->query_len), data->query_len); 02142 return TRUE; 02143 } 02144 02145 /* Attempts to parse the fragment from the URI. 02146 * 02147 * NOTES: 02148 * If NO_DECODE_EXTRA_INFO flag is set, then invalid percent encoded 02149 * data is allowed appear in the query string. For unknown scheme types 02150 * invalid percent encoded data is allowed to appear reguardless. 02151 */ 02152 static BOOL parse_fragment(const WCHAR **ptr, parse_data *data, DWORD flags) { 02153 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 02154 02155 if(**ptr != '#') { 02156 TRACE("(%p %p %x): URI didn't contain a fragment.\n", ptr, data, flags); 02157 return TRUE; 02158 } 02159 02160 data->fragment = *ptr; 02161 02162 ++(*ptr); 02163 while(**ptr) { 02164 if(**ptr == '%' && known_scheme && 02165 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 02166 if(!check_pct_encoded(ptr)) { 02167 *ptr = data->fragment; 02168 data->fragment = NULL; 02169 return FALSE; 02170 } else 02171 continue; 02172 } 02173 02174 ++(*ptr); 02175 } 02176 02177 data->fragment_len = *ptr - data->fragment; 02178 02179 TRACE("(%p %p %x): Parsed fragment %s len=%d\n", ptr, data, flags, 02180 debugstr_wn(data->fragment, data->fragment_len), data->fragment_len); 02181 return TRUE; 02182 } 02183 02184 /* Parses and validates the components of the specified by data->uri 02185 * and stores the information it parses into 'data'. 02186 * 02187 * Returns TRUE if it successfully parsed the URI. False otherwise. 02188 */ 02189 static BOOL parse_uri(parse_data *data, DWORD flags) { 02190 const WCHAR *ptr; 02191 const WCHAR **pptr; 02192 02193 ptr = data->uri; 02194 pptr = &ptr; 02195 02196 TRACE("(%p %x): BEGINNING TO PARSE URI %s.\n", data, flags, debugstr_w(data->uri)); 02197 02198 if(!parse_scheme(pptr, data, flags, 0)) 02199 return FALSE; 02200 02201 if(!parse_hierpart(pptr, data, flags)) 02202 return FALSE; 02203 02204 if(!parse_query(pptr, data, flags)) 02205 return FALSE; 02206 02207 if(!parse_fragment(pptr, data, flags)) 02208 return FALSE; 02209 02210 TRACE("(%p %x): FINISHED PARSING URI.\n", data, flags); 02211 return TRUE; 02212 } 02213 02214 static BOOL canonicalize_username(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 02215 const WCHAR *ptr; 02216 02217 if(!data->username) { 02218 uri->userinfo_start = -1; 02219 return TRUE; 02220 } 02221 02222 uri->userinfo_start = uri->canon_len; 02223 for(ptr = data->username; ptr < data->username+data->username_len; ++ptr) { 02224 if(*ptr == '%') { 02225 /* Only decode % encoded values for known scheme types. */ 02226 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 02227 /* See if the value really needs decoded. */ 02228 WCHAR val = decode_pct_val(ptr); 02229 if(is_unreserved(val)) { 02230 if(!computeOnly) 02231 uri->canon_uri[uri->canon_len] = val; 02232 02233 ++uri->canon_len; 02234 02235 /* Move pass the hex characters. */ 02236 ptr += 2; 02237 continue; 02238 } 02239 } 02240 } else if(!is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') { 02241 /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag 02242 * is NOT set. 02243 */ 02244 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) { 02245 if(!computeOnly) 02246 pct_encode_val(*ptr, uri->canon_uri + uri->canon_len); 02247 02248 uri->canon_len += 3; 02249 continue; 02250 } 02251 } 02252 02253 if(!computeOnly) 02254 /* Nothing special, so just copy the character over. */ 02255 uri->canon_uri[uri->canon_len] = *ptr; 02256 ++uri->canon_len; 02257 } 02258 02259 return TRUE; 02260 } 02261 02262 static BOOL canonicalize_password(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 02263 const WCHAR *ptr; 02264 02265 if(!data->password) { 02266 uri->userinfo_split = -1; 02267 return TRUE; 02268 } 02269 02270 if(uri->userinfo_start == -1) 02271 /* Has a password, but, doesn't have a username. */ 02272 uri->userinfo_start = uri->canon_len; 02273 02274 uri->userinfo_split = uri->canon_len - uri->userinfo_start; 02275 02276 /* Add the ':' to the userinfo component. */ 02277 if(!computeOnly) 02278 uri->canon_uri[uri->canon_len] = ':'; 02279 ++uri->canon_len; 02280 02281 for(ptr = data->password; ptr < data->password+data->password_len; ++ptr) { 02282 if(*ptr == '%') { 02283 /* Only decode % encoded values for known scheme types. */ 02284 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 02285 /* See if the value really needs decoded. */ 02286 WCHAR val = decode_pct_val(ptr); 02287 if(is_unreserved(val)) { 02288 if(!computeOnly) 02289 uri->canon_uri[uri->canon_len] = val; 02290 02291 ++uri->canon_len; 02292 02293 /* Move pass the hex characters. */ 02294 ptr += 2; 02295 continue; 02296 } 02297 } 02298 } else if(!is_reserved(*ptr) && !is_unreserved(*ptr) && *ptr != '\\') { 02299 /* Only percent encode forbidden characters if the NO_ENCODE_FORBIDDEN_CHARACTERS flag 02300 * is NOT set. 02301 */ 02302 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) { 02303 if(!computeOnly) 02304 pct_encode_val(*ptr, uri->canon_uri + uri->canon_len); 02305 02306 uri->canon_len += 3; 02307 continue; 02308 } 02309 } 02310 02311 if(!computeOnly) 02312 /* Nothing special, so just copy the character over. */ 02313 uri->canon_uri[uri->canon_len] = *ptr; 02314 ++uri->canon_len; 02315 } 02316 02317 return TRUE; 02318 } 02319 02320 /* Canonicalizes the userinfo of the URI represented by the parse_data. 02321 * 02322 * Canonicalization of the userinfo is a simple process. If there are any percent 02323 * encoded characters that fall in the "unreserved" character set, they are decoded 02324 * to their actual value. If a character is not in the "unreserved" or "reserved" sets 02325 * then it is percent encoded. Other than that the characters are copied over without 02326 * change. 02327 */ 02328 static BOOL canonicalize_userinfo(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 02329 uri->userinfo_start = uri->userinfo_split = -1; 02330 uri->userinfo_len = 0; 02331 02332 if(!data->username && !data->password) 02333 /* URI doesn't have userinfo, so nothing to do here. */ 02334 return TRUE; 02335 02336 if(!canonicalize_username(data, uri, flags, computeOnly)) 02337 return FALSE; 02338 02339 if(!canonicalize_password(data, uri, flags, computeOnly)) 02340 return FALSE; 02341 02342 uri->userinfo_len = uri->canon_len - uri->userinfo_start; 02343 if(!computeOnly) 02344 TRACE("(%p %p %x %d): Canonicalized userinfo, userinfo_start=%d, userinfo=%s, userinfo_split=%d userinfo_len=%d.\n", 02345 data, uri, flags, computeOnly, uri->userinfo_start, debugstr_wn(uri->canon_uri + uri->userinfo_start, uri->userinfo_len), 02346 uri->userinfo_split, uri->userinfo_len); 02347 02348 /* Now insert the '@' after the userinfo. */ 02349 if(!computeOnly) 02350 uri->canon_uri[uri->canon_len] = '@'; 02351 ++uri->canon_len; 02352 02353 return TRUE; 02354 } 02355 02356 /* Attempts to canonicalize a reg_name. 02357 * 02358 * Things that happen: 02359 * 1) If Uri_CREATE_NO_CANONICALIZE flag is not set, then the reg_name is 02360 * lower cased. Unless it's an unknown scheme type, which case it's 02361 * no lower cased reguardless. 02362 * 02363 * 2) Unreserved % encoded characters are decoded for known 02364 * scheme types. 02365 * 02366 * 3) Forbidden characters are % encoded as long as 02367 * Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS flag is not set and 02368 * it isn't an unknown scheme type. 02369 * 02370 * 4) If it's a file scheme and the host is "localhost" it's removed. 02371 * 02372 * 5) If it's a file scheme and Uri_CREATE_FILE_USE_DOS_PATH is set, 02373 * then the UNC path characters are added before the host name. 02374 */ 02375 static BOOL canonicalize_reg_name(const parse_data *data, Uri *uri, 02376 DWORD flags, BOOL computeOnly) { 02377 static const WCHAR localhostW[] = 02378 {'l','o','c','a','l','h','o','s','t',0}; 02379 const WCHAR *ptr; 02380 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 02381 02382 if(data->scheme_type == URL_SCHEME_FILE && 02383 data->host_len == lstrlenW(localhostW)) { 02384 if(!StrCmpNIW(data->host, localhostW, data->host_len)) { 02385 uri->host_start = -1; 02386 uri->host_len = 0; 02387 uri->host_type = Uri_HOST_UNKNOWN; 02388 return TRUE; 02389 } 02390 } 02391 02392 if(data->scheme_type == URL_SCHEME_FILE && flags & Uri_CREATE_FILE_USE_DOS_PATH) { 02393 if(!computeOnly) { 02394 uri->canon_uri[uri->canon_len] = '\\'; 02395 uri->canon_uri[uri->canon_len+1] = '\\'; 02396 } 02397 uri->canon_len += 2; 02398 uri->authority_start = uri->canon_len; 02399 } 02400 02401 uri->host_start = uri->canon_len; 02402 02403 for(ptr = data->host; ptr < data->host+data->host_len; ++ptr) { 02404 if(*ptr == '%' && known_scheme) { 02405 WCHAR val = decode_pct_val(ptr); 02406 if(is_unreserved(val)) { 02407 /* If NO_CANONICALZE is not set, then windows lower cases the 02408 * decoded value. 02409 */ 02410 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && isupperW(val)) { 02411 if(!computeOnly) 02412 uri->canon_uri[uri->canon_len] = tolowerW(val); 02413 } else { 02414 if(!computeOnly) 02415 uri->canon_uri[uri->canon_len] = val; 02416 } 02417 ++uri->canon_len; 02418 02419 /* Skip past the % encoded character. */ 02420 ptr += 2; 02421 continue; 02422 } else { 02423 /* Just copy the % over. */ 02424 if(!computeOnly) 02425 uri->canon_uri[uri->canon_len] = *ptr; 02426 ++uri->canon_len; 02427 } 02428 } else if(*ptr == '\\') { 02429 /* Only unknown scheme types could have made it here with a '\\' in the host name. */ 02430 if(!computeOnly) 02431 uri->canon_uri[uri->canon_len] = *ptr; 02432 ++uri->canon_len; 02433 } else if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) && 02434 !is_unreserved(*ptr) && !is_reserved(*ptr) && known_scheme) { 02435 if(!computeOnly) { 02436 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 02437 02438 /* The percent encoded value gets lower cased also. */ 02439 if(!(flags & Uri_CREATE_NO_CANONICALIZE)) { 02440 uri->canon_uri[uri->canon_len+1] = tolowerW(uri->canon_uri[uri->canon_len+1]); 02441 uri->canon_uri[uri->canon_len+2] = tolowerW(uri->canon_uri[uri->canon_len+2]); 02442 } 02443 } 02444 02445 uri->canon_len += 3; 02446 } else { 02447 if(!computeOnly) { 02448 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && known_scheme) 02449 uri->canon_uri[uri->canon_len] = tolowerW(*ptr); 02450 else 02451 uri->canon_uri[uri->canon_len] = *ptr; 02452 } 02453 02454 ++uri->canon_len; 02455 } 02456 } 02457 02458 uri->host_len = uri->canon_len - uri->host_start; 02459 02460 if(!computeOnly) 02461 TRACE("(%p %p %x %d): Canonicalize reg_name=%s len=%d\n", data, uri, flags, 02462 computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 02463 uri->host_len); 02464 02465 if(!computeOnly) 02466 find_domain_name(uri->canon_uri+uri->host_start, uri->host_len, 02467 &(uri->domain_offset)); 02468 02469 return TRUE; 02470 } 02471 02472 /* Attempts to canonicalize an implicit IPv4 address. */ 02473 static BOOL canonicalize_implicit_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 02474 uri->host_start = uri->canon_len; 02475 02476 TRACE("%u\n", data->implicit_ipv4); 02477 /* For unknown scheme types Window's doesn't convert 02478 * the value into an IP address, but, it still considers 02479 * it an IPv4 address. 02480 */ 02481 if(data->scheme_type == URL_SCHEME_UNKNOWN) { 02482 if(!computeOnly) 02483 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 02484 uri->canon_len += data->host_len; 02485 } else { 02486 if(!computeOnly) 02487 uri->canon_len += ui2ipv4(uri->canon_uri+uri->canon_len, data->implicit_ipv4); 02488 else 02489 uri->canon_len += ui2ipv4(NULL, data->implicit_ipv4); 02490 } 02491 02492 uri->host_len = uri->canon_len - uri->host_start; 02493 uri->host_type = Uri_HOST_IPV4; 02494 02495 if(!computeOnly) 02496 TRACE("%p %p %x %d): Canonicalized implicit IP address=%s len=%d\n", 02497 data, uri, flags, computeOnly, 02498 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 02499 uri->host_len); 02500 02501 return TRUE; 02502 } 02503 02504 /* Attempts to canonicalize an IPv4 address. 02505 * 02506 * If the parse_data represents a URI that has an implicit IPv4 address 02507 * (ex. http://256/, this function will convert 256 into 0.0.1.0). If 02508 * the implicit IP address exceeds the value of UINT_MAX (maximum value 02509 * for an IPv4 address) it's canonicalized as if were a reg-name. 02510 * 02511 * If the parse_data contains a partial or full IPv4 address it normalizes it. 02512 * A partial IPv4 address is something like "192.0" and would be normalized to 02513 * "192.0.0.0". With a full (or partial) IPv4 address like "192.002.01.003" would 02514 * be normalized to "192.2.1.3". 02515 * 02516 * NOTES: 02517 * Window's ONLY normalizes IPv4 address for known scheme types (one that isn't 02518 * URL_SCHEME_UNKNOWN). For unknown scheme types, it simply copies the data from 02519 * the original URI into the canonicalized URI, but, it still recognizes URI's 02520 * host type as HOST_IPV4. 02521 */ 02522 static BOOL canonicalize_ipv4address(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 02523 if(data->has_implicit_ip) 02524 return canonicalize_implicit_ipv4address(data, uri, flags, computeOnly); 02525 else { 02526 uri->host_start = uri->canon_len; 02527 02528 /* Windows only normalizes for known scheme types. */ 02529 if(data->scheme_type != URL_SCHEME_UNKNOWN) { 02530 /* parse_data contains a partial or full IPv4 address, so normalize it. */ 02531 DWORD i, octetDigitCount = 0, octetCount = 0; 02532 BOOL octetHasDigit = FALSE; 02533 02534 for(i = 0; i < data->host_len; ++i) { 02535 if(data->host[i] == '0' && !octetHasDigit) { 02536 /* Can ignore leading zeros if: 02537 * 1) It isn't the last digit of the octet. 02538 * 2) i+1 != data->host_len 02539 * 3) i+1 != '.' 02540 */ 02541 if(octetDigitCount == 2 || 02542 i+1 == data->host_len || 02543 data->host[i+1] == '.') { 02544 if(!computeOnly) 02545 uri->canon_uri[uri->canon_len] = data->host[i]; 02546 ++uri->canon_len; 02547 TRACE("Adding zero\n"); 02548 } 02549 } else if(data->host[i] == '.') { 02550 if(!computeOnly) 02551 uri->canon_uri[uri->canon_len] = data->host[i]; 02552 ++uri->canon_len; 02553 02554 octetDigitCount = 0; 02555 octetHasDigit = FALSE; 02556 ++octetCount; 02557 } else { 02558 if(!computeOnly) 02559 uri->canon_uri[uri->canon_len] = data->host[i]; 02560 ++uri->canon_len; 02561 02562 ++octetDigitCount; 02563 octetHasDigit = TRUE; 02564 } 02565 } 02566 02567 /* Make sure the canonicalized IP address has 4 dec-octets. 02568 * If doesn't add "0" ones until there is 4; 02569 */ 02570 for( ; octetCount < 3; ++octetCount) { 02571 if(!computeOnly) { 02572 uri->canon_uri[uri->canon_len] = '.'; 02573 uri->canon_uri[uri->canon_len+1] = '0'; 02574 } 02575 02576 uri->canon_len += 2; 02577 } 02578 } else { 02579 /* Windows doesn't normalize addresses in unknown schemes. */ 02580 if(!computeOnly) 02581 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 02582 uri->canon_len += data->host_len; 02583 } 02584 02585 uri->host_len = uri->canon_len - uri->host_start; 02586 if(!computeOnly) 02587 TRACE("(%p %p %x %d): Canonicalized IPv4 address, ip=%s len=%d\n", 02588 data, uri, flags, computeOnly, 02589 debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 02590 uri->host_len); 02591 } 02592 02593 return TRUE; 02594 } 02595 02596 /* Attempts to canonicalize the IPv6 address of the URI. 02597 * 02598 * Multiple things happen during the canonicalization of an IPv6 address: 02599 * 1) Any leading zero's in an h16 component are removed. 02600 * Ex: [0001:0022::] -> [1:22::] 02601 * 02602 * 2) The longest sequence of zero h16 components are compressed 02603 * into a "::" (elision). If there's a tie, the first is choosen. 02604 * 02605 * Ex: [0:0:0:0:1:6:7:8] -> [::1:6:7:8] 02606 * [0:0:0:0:1:2::] -> [::1:2:0:0] 02607 * [0:0:1:2:0:0:7:8] -> [::1:2:0:0:7:8] 02608 * 02609 * 3) If an IPv4 address is attached to the IPv6 address, it's 02610 * also normalized. 02611 * Ex: [::001.002.022.000] -> [::1.2.22.0] 02612 * 02613 * 4) If an elision is present, but, only represents 1 h16 component 02614 * it's expanded. 02615 * 02616 * Ex: [1::2:3:4:5:6:7] -> [1:0:2:3:4:5:6:7] 02617 * 02618 * 5) If the IPv6 address contains an IPv4 address and there exists 02619 * at least 1 non-zero h16 component the IPv4 address is converted 02620 * into two h16 components, otherwise it's normalized and kept as is. 02621 * 02622 * Ex: [::192.200.003.4] -> [::192.200.3.4] 02623 * [ffff::192.200.003.4] -> [ffff::c0c8:3041] 02624 * 02625 * NOTE: 02626 * For unknown scheme types Windows simply copies the address over without any 02627 * changes. 02628 * 02629 * IPv4 address can be included in an elision if all its components are 0's. 02630 */ 02631 static BOOL canonicalize_ipv6address(const parse_data *data, Uri *uri, 02632 DWORD flags, BOOL computeOnly) { 02633 uri->host_start = uri->canon_len; 02634 02635 if(data->scheme_type == URL_SCHEME_UNKNOWN) { 02636 if(!computeOnly) 02637 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 02638 uri->canon_len += data->host_len; 02639 } else { 02640 USHORT values[8]; 02641 INT elision_start; 02642 DWORD i, elision_len; 02643 02644 if(!ipv6_to_number(&(data->ipv6_address), values)) { 02645 TRACE("(%p %p %x %d): Failed to compute numerical value for IPv6 address.\n", 02646 data, uri, flags, computeOnly); 02647 return FALSE; 02648 } 02649 02650 if(!computeOnly) 02651 uri->canon_uri[uri->canon_len] = '['; 02652 ++uri->canon_len; 02653 02654 /* Find where the elision should occur (if any). */ 02655 compute_elision_location(&(data->ipv6_address), values, &elision_start, &elision_len); 02656 02657 TRACE("%p %p %x %d): Elision starts at %d, len=%u\n", data, uri, flags, 02658 computeOnly, elision_start, elision_len); 02659 02660 for(i = 0; i < 8; ++i) { 02661 BOOL in_elision = (elision_start > -1 && i >= elision_start && 02662 i < elision_start+elision_len); 02663 BOOL do_ipv4 = (i == 6 && data->ipv6_address.ipv4 && !in_elision && 02664 data->ipv6_address.h16_count == 0); 02665 02666 if(i == elision_start) { 02667 if(!computeOnly) { 02668 uri->canon_uri[uri->canon_len] = ':'; 02669 uri->canon_uri[uri->canon_len+1] = ':'; 02670 } 02671 uri->canon_len += 2; 02672 } 02673 02674 /* We can ignore the current component if we're in the elision. */ 02675 if(in_elision) 02676 continue; 02677 02678 /* We only add a ':' if we're not at i == 0, or when we're at 02679 * the very end of elision range since the ':' colon was handled 02680 * earlier. Otherwise we would end up with ":::" after elision. 02681 */ 02682 if(i != 0 && !(elision_start > -1 && i == elision_start+elision_len)) { 02683 if(!computeOnly) 02684 uri->canon_uri[uri->canon_len] = ':'; 02685 ++uri->canon_len; 02686 } 02687 02688 if(do_ipv4) { 02689 UINT val; 02690 DWORD len; 02691 02692 /* Combine the two parts of the IPv4 address values. */ 02693 val = values[i]; 02694 val <<= 16; 02695 val += values[i+1]; 02696 02697 if(!computeOnly) 02698 len = ui2ipv4(uri->canon_uri+uri->canon_len, val); 02699 else 02700 len = ui2ipv4(NULL, val); 02701 02702 uri->canon_len += len; 02703 ++i; 02704 } else { 02705 /* Write a regular h16 component to the URI. */ 02706 02707 /* Short circuit for the trivial case. */ 02708 if(values[i] == 0) { 02709 if(!computeOnly) 02710 uri->canon_uri[uri->canon_len] = '0'; 02711 ++uri->canon_len; 02712 } else { 02713 static const WCHAR formatW[] = {'%','x',0}; 02714 02715 if(!computeOnly) 02716 uri->canon_len += sprintfW(uri->canon_uri+uri->canon_len, 02717 formatW, values[i]); 02718 else { 02719 WCHAR tmp[5]; 02720 uri->canon_len += sprintfW(tmp, formatW, values[i]); 02721 } 02722 } 02723 } 02724 } 02725 02726 /* Add the closing ']'. */ 02727 if(!computeOnly) 02728 uri->canon_uri[uri->canon_len] = ']'; 02729 ++uri->canon_len; 02730 } 02731 02732 uri->host_len = uri->canon_len - uri->host_start; 02733 02734 if(!computeOnly) 02735 TRACE("(%p %p %x %d): Canonicalized IPv6 address %s, len=%d\n", data, uri, flags, 02736 computeOnly, debugstr_wn(uri->canon_uri+uri->host_start, uri->host_len), 02737 uri->host_len); 02738 02739 return TRUE; 02740 } 02741 02742 /* Attempts to canonicalize the host of the URI (if any). */ 02743 static BOOL canonicalize_host(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 02744 uri->host_start = -1; 02745 uri->host_len = 0; 02746 uri->domain_offset = -1; 02747 02748 if(data->host) { 02749 switch(data->host_type) { 02750 case Uri_HOST_DNS: 02751 uri->host_type = Uri_HOST_DNS; 02752 if(!canonicalize_reg_name(data, uri, flags, computeOnly)) 02753 return FALSE; 02754 02755 break; 02756 case Uri_HOST_IPV4: 02757 uri->host_type = Uri_HOST_IPV4; 02758 if(!canonicalize_ipv4address(data, uri, flags, computeOnly)) 02759 return FALSE; 02760 02761 break; 02762 case Uri_HOST_IPV6: 02763 if(!canonicalize_ipv6address(data, uri, flags, computeOnly)) 02764 return FALSE; 02765 02766 uri->host_type = Uri_HOST_IPV6; 02767 break; 02768 case Uri_HOST_UNKNOWN: 02769 if(data->host_len > 0 || data->scheme_type != URL_SCHEME_FILE) { 02770 uri->host_start = uri->canon_len; 02771 02772 /* Nothing happens to unknown host types. */ 02773 if(!computeOnly) 02774 memcpy(uri->canon_uri+uri->canon_len, data->host, data->host_len*sizeof(WCHAR)); 02775 uri->canon_len += data->host_len; 02776 uri->host_len = data->host_len; 02777 } 02778 02779 uri->host_type = Uri_HOST_UNKNOWN; 02780 break; 02781 default: 02782 FIXME("(%p %p %x %d): Canonicalization for host type %d not supported.\n", data, 02783 uri, flags, computeOnly, data->host_type); 02784 return FALSE; 02785 } 02786 } 02787 02788 return TRUE; 02789 } 02790 02791 static BOOL canonicalize_port(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 02792 BOOL has_default_port = FALSE; 02793 USHORT default_port = 0; 02794 DWORD i; 02795 02796 uri->port_offset = -1; 02797 02798 /* Check if the scheme has a default port. */ 02799 for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) { 02800 if(default_ports[i].scheme == data->scheme_type) { 02801 has_default_port = TRUE; 02802 default_port = default_ports[i].port; 02803 break; 02804 } 02805 } 02806 02807 uri->has_port = data->has_port || has_default_port; 02808 02809 /* Possible cases: 02810 * 1) Has a port which is the default port. 02811 * 2) Has a port (not the default). 02812 * 3) Doesn't have a port, but, scheme has a default port. 02813 * 4) No port. 02814 */ 02815 if(has_default_port && data->has_port && data->port_value == default_port) { 02816 /* If it's the default port and this flag isn't set, don't do anything. */ 02817 if(flags & Uri_CREATE_NO_CANONICALIZE) { 02818 uri->port_offset = uri->canon_len-uri->authority_start; 02819 if(!computeOnly) 02820 uri->canon_uri[uri->canon_len] = ':'; 02821 ++uri->canon_len; 02822 02823 if(data->port) { 02824 /* Copy the original port over. */ 02825 if(!computeOnly) 02826 memcpy(uri->canon_uri+uri->canon_len, data->port, data->port_len*sizeof(WCHAR)); 02827 uri->canon_len += data->port_len; 02828 } else { 02829 if(!computeOnly) 02830 uri->canon_len += ui2str(uri->canon_uri+uri->canon_len, data->port_value); 02831 else 02832 uri->canon_len += ui2str(NULL, data->port_value); 02833 } 02834 } 02835 02836 uri->port = default_port; 02837 } else if(data->has_port) { 02838 uri->port_offset = uri->canon_len-uri->authority_start; 02839 if(!computeOnly) 02840 uri->canon_uri[uri->canon_len] = ':'; 02841 ++uri->canon_len; 02842 02843 if(flags & Uri_CREATE_NO_CANONICALIZE && data->port) { 02844 /* Copy the original over without changes. */ 02845 if(!computeOnly) 02846 memcpy(uri->canon_uri+uri->canon_len, data->port, data->port_len*sizeof(WCHAR)); 02847 uri->canon_len += data->port_len; 02848 } else { 02849 if(!computeOnly) 02850 uri->canon_len += ui2str(uri->canon_uri+uri->canon_len, data->port_value); 02851 else 02852 uri->canon_len += ui2str(NULL, data->port_value); 02853 } 02854 02855 uri->port = data->port_value; 02856 } else if(has_default_port) 02857 uri->port = default_port; 02858 02859 return TRUE; 02860 } 02861 02862 /* Canonicalizes the authority of the URI represented by the parse_data. */ 02863 static BOOL canonicalize_authority(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 02864 uri->authority_start = uri->canon_len; 02865 uri->authority_len = 0; 02866 02867 if(!canonicalize_userinfo(data, uri, flags, computeOnly)) 02868 return FALSE; 02869 02870 if(!canonicalize_host(data, uri, flags, computeOnly)) 02871 return FALSE; 02872 02873 if(!canonicalize_port(data, uri, flags, computeOnly)) 02874 return FALSE; 02875 02876 if(uri->host_start != -1 || (data->is_relative && (data->password || data->username))) 02877 uri->authority_len = uri->canon_len - uri->authority_start; 02878 else 02879 uri->authority_start = -1; 02880 02881 return TRUE; 02882 } 02883 02884 /* Attempts to canonicalize the path of a hierarchical URI. 02885 * 02886 * Things that happen: 02887 * 1). Forbidden characters are percent encoded, unless the NO_ENCODE_FORBIDDEN 02888 * flag is set or it's a file URI. Forbidden characters are always encoded 02889 * for file schemes reguardless and forbidden characters are never encoded 02890 * for unknown scheme types. 02891 * 02892 * 2). For known scheme types '\\' are changed to '/'. 02893 * 02894 * 3). Percent encoded, unreserved characters are decoded to their actual values. 02895 * Unless the scheme type is unknown. For file schemes any percent encoded 02896 * character in the unreserved or reserved set is decoded. 02897 * 02898 * 4). For File schemes if the path is starts with a drive letter and doesn't 02899 * start with a '/' then one is appended. 02900 * Ex: file://c:/test.mp3 -> file:///c:/test.mp3 02901 * 02902 * 5). Dot segments are removed from the path for all scheme types 02903 * unless NO_CANONICALIZE flag is set. Dot segments aren't removed 02904 * for wildcard scheme types. 02905 * 02906 * NOTES: 02907 * file://c:/test%20test -> file:///c:/test%2520test 02908 * file://c:/test%3Etest -> file:///c:/test%253Etest 02909 * if Uri_CREATE_FILE_USE_DOS_PATH is not set: 02910 * file:///c:/test%20test -> file:///c:/test%20test 02911 * file:///c:/test%test -> file:///c:/test%25test 02912 */ 02913 static BOOL canonicalize_path_hierarchical(const parse_data *data, Uri *uri, 02914 DWORD flags, BOOL computeOnly) { 02915 const WCHAR *ptr; 02916 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 02917 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE; 02918 const BOOL is_res = data->scheme_type == URL_SCHEME_RES; 02919 02920 BOOL escape_pct = FALSE; 02921 02922 if(!data->path) { 02923 uri->path_start = -1; 02924 uri->path_len = 0; 02925 return TRUE; 02926 } 02927 02928 uri->path_start = uri->canon_len; 02929 ptr = data->path; 02930 02931 if(is_file && uri->host_start == -1) { 02932 /* Check if a '/' needs to be appended for the file scheme. */ 02933 if(data->path_len > 1 && is_drive_path(ptr) && !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 02934 if(!computeOnly) 02935 uri->canon_uri[uri->canon_len] = '/'; 02936 uri->canon_len++; 02937 escape_pct = TRUE; 02938 } else if(*ptr == '/') { 02939 if(!(flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 02940 /* Copy the extra '/' over. */ 02941 if(!computeOnly) 02942 uri->canon_uri[uri->canon_len] = '/'; 02943 ++uri->canon_len; 02944 } 02945 ++ptr; 02946 } 02947 02948 if(is_drive_path(ptr)) { 02949 if(!computeOnly) { 02950 uri->canon_uri[uri->canon_len] = *ptr; 02951 /* If theres a '|' after the drive letter, convert it to a ':'. */ 02952 uri->canon_uri[uri->canon_len+1] = ':'; 02953 } 02954 ptr += 2; 02955 uri->canon_len += 2; 02956 } 02957 } 02958 02959 if(!is_file && *(data->path) && *(data->path) != '/') { 02960 /* Prepend a '/' to the path if it doesn't have one. */ 02961 if(!computeOnly) 02962 uri->canon_uri[uri->canon_len] = '/'; 02963 ++uri->canon_len; 02964 } 02965 02966 for(; ptr < data->path+data->path_len; ++ptr) { 02967 BOOL do_default_action = TRUE; 02968 02969 if(*ptr == '%' && !is_res) { 02970 const WCHAR *tmp = ptr; 02971 WCHAR val; 02972 02973 /* Check if the % represents a valid encoded char, or if it needs encoded. */ 02974 BOOL force_encode = !check_pct_encoded(&tmp) && is_file && !(flags&Uri_CREATE_FILE_USE_DOS_PATH); 02975 val = decode_pct_val(ptr); 02976 02977 if(force_encode || escape_pct) { 02978 /* Escape the percent sign in the file URI. */ 02979 if(!computeOnly) 02980 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 02981 uri->canon_len += 3; 02982 do_default_action = FALSE; 02983 } else if((is_unreserved(val) && known_scheme) || 02984 (is_file && (is_unreserved(val) || is_reserved(val) || 02985 (val && flags&Uri_CREATE_FILE_USE_DOS_PATH && !is_forbidden_dos_path_char(val))))) { 02986 if(!computeOnly) 02987 uri->canon_uri[uri->canon_len] = val; 02988 ++uri->canon_len; 02989 02990 ptr += 2; 02991 continue; 02992 } 02993 } else if(*ptr == '/' && is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 02994 /* Convert the '/' back to a '\\'. */ 02995 if(!computeOnly) 02996 uri->canon_uri[uri->canon_len] = '\\'; 02997 ++uri->canon_len; 02998 do_default_action = FALSE; 02999 } else if(*ptr == '\\' && known_scheme) { 03000 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH))) { 03001 /* Convert '\\' into a '/'. */ 03002 if(!computeOnly) 03003 uri->canon_uri[uri->canon_len] = '/'; 03004 ++uri->canon_len; 03005 do_default_action = FALSE; 03006 } 03007 } else if(known_scheme && !is_res && !is_unreserved(*ptr) && !is_reserved(*ptr) && 03008 (!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) || is_file)) { 03009 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH))) { 03010 /* Escape the forbidden character. */ 03011 if(!computeOnly) 03012 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 03013 uri->canon_len += 3; 03014 do_default_action = FALSE; 03015 } 03016 } 03017 03018 if(do_default_action) { 03019 if(!computeOnly) 03020 uri->canon_uri[uri->canon_len] = *ptr; 03021 ++uri->canon_len; 03022 } 03023 } 03024 03025 uri->path_len = uri->canon_len - uri->path_start; 03026 03027 /* Removing the dot segments only happens when it's not in 03028 * computeOnly mode and it's not a wildcard scheme. File schemes 03029 * with USE_DOS_PATH set don't get dot segments removed. 03030 */ 03031 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) && 03032 data->scheme_type != URL_SCHEME_WILDCARD) { 03033 if(!(flags & Uri_CREATE_NO_CANONICALIZE) && !computeOnly) { 03034 /* Remove the dot segments (if any) and reset everything to the new 03035 * correct length. 03036 */ 03037 DWORD new_len = remove_dot_segments(uri->canon_uri+uri->path_start, uri->path_len); 03038 uri->canon_len -= uri->path_len-new_len; 03039 uri->path_len = new_len; 03040 } 03041 } 03042 03043 if(!computeOnly) 03044 TRACE("Canonicalized path %s len=%d\n", 03045 debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len), 03046 uri->path_len); 03047 03048 return TRUE; 03049 } 03050 03051 /* Attempts to canonicalize the path for an opaque URI. 03052 * 03053 * For known scheme types: 03054 * 1) forbidden characters are percent encoded if 03055 * NO_ENCODE_FORBIDDEN_CHARACTERS isn't set. 03056 * 03057 * 2) Percent encoded, unreserved characters are decoded 03058 * to their actual values, for known scheme types. 03059 * 03060 * 3) '\\' are changed to '/' for known scheme types 03061 * except for mailto schemes. 03062 * 03063 * 4) For file schemes, if USE_DOS_PATH is set all '/' 03064 * are converted to backslashes. 03065 * 03066 * 5) For file schemes, if USE_DOS_PATH isn't set all '\' 03067 * are converted to forward slashes. 03068 */ 03069 static BOOL canonicalize_path_opaque(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 03070 const WCHAR *ptr; 03071 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 03072 const BOOL is_file = data->scheme_type == URL_SCHEME_FILE; 03073 03074 if(!data->path) { 03075 uri->path_start = -1; 03076 uri->path_len = 0; 03077 return TRUE; 03078 } 03079 03080 uri->path_start = uri->canon_len; 03081 03082 /* Windows doesn't allow a "//" to appear after the scheme 03083 * of a URI, if it's an opaque URI. 03084 */ 03085 if(data->scheme && *(data->path) == '/' && *(data->path+1) == '/') { 03086 /* So it inserts a "/." before the "//" if it exists. */ 03087 if(!computeOnly) { 03088 uri->canon_uri[uri->canon_len] = '/'; 03089 uri->canon_uri[uri->canon_len+1] = '.'; 03090 } 03091 03092 uri->canon_len += 2; 03093 } 03094 03095 for(ptr = data->path; ptr < data->path+data->path_len; ++ptr) { 03096 BOOL do_default_action = TRUE; 03097 03098 if(*ptr == '%' && known_scheme) { 03099 WCHAR val = decode_pct_val(ptr); 03100 03101 if(is_unreserved(val)) { 03102 if(!computeOnly) 03103 uri->canon_uri[uri->canon_len] = val; 03104 ++uri->canon_len; 03105 03106 ptr += 2; 03107 continue; 03108 } 03109 } else if(*ptr == '/' && is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 03110 if(!computeOnly) 03111 uri->canon_uri[uri->canon_len] = '\\'; 03112 ++uri->canon_len; 03113 do_default_action = FALSE; 03114 } else if(*ptr == '\\') { 03115 if(is_file && !(flags & Uri_CREATE_FILE_USE_DOS_PATH)) { 03116 /* Convert to a '/'. */ 03117 if(!computeOnly) 03118 uri->canon_uri[uri->canon_len] = '/'; 03119 ++uri->canon_len; 03120 do_default_action = FALSE; 03121 } 03122 } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr) && 03123 !(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS)) { 03124 if(!(is_file && (flags & Uri_CREATE_FILE_USE_DOS_PATH))) { 03125 if(!computeOnly) 03126 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 03127 uri->canon_len += 3; 03128 do_default_action = FALSE; 03129 } 03130 } 03131 03132 if(do_default_action) { 03133 if(!computeOnly) 03134 uri->canon_uri[uri->canon_len] = *ptr; 03135 ++uri->canon_len; 03136 } 03137 } 03138 03139 if(data->scheme_type == URL_SCHEME_MK && !computeOnly && !(flags & Uri_CREATE_NO_CANONICALIZE)) { 03140 DWORD new_len = remove_dot_segments(uri->canon_uri + uri->path_start, 03141 uri->canon_len - uri->path_start); 03142 uri->canon_len = uri->path_start + new_len; 03143 } 03144 03145 uri->path_len = uri->canon_len - uri->path_start; 03146 03147 if(!computeOnly) 03148 TRACE("(%p %p %x %d): Canonicalized opaque URI path %s len=%d\n", data, uri, flags, computeOnly, 03149 debugstr_wn(uri->canon_uri+uri->path_start, uri->path_len), uri->path_len); 03150 return TRUE; 03151 } 03152 03153 /* Determines how the URI represented by the parse_data should be canonicalized. 03154 * 03155 * Essentially, if the parse_data represents an hierarchical URI then it calls 03156 * canonicalize_authority and the canonicalization functions for the path. If the 03157 * URI is opaque it canonicalizes the path of the URI. 03158 */ 03159 static BOOL canonicalize_hierpart(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 03160 if(!data->is_opaque || (data->is_relative && (data->password || data->username))) { 03161 /* "//" is only added for non-wildcard scheme types. 03162 * 03163 * A "//" is only added to a relative URI if it has a 03164 * host or port component (this only happens if a IUriBuilder 03165 * is generating an IUri). 03166 */ 03167 if((data->is_relative && (data->host || data->has_port)) || 03168 (!data->is_relative && data->scheme_type != URL_SCHEME_WILDCARD)) { 03169 if(!computeOnly) { 03170 INT pos = uri->canon_len; 03171 03172 uri->canon_uri[pos] = '/'; 03173 uri->canon_uri[pos+1] = '/'; 03174 } 03175 uri->canon_len += 2; 03176 } 03177 03178 if(!canonicalize_authority(data, uri, flags, computeOnly)) 03179 return FALSE; 03180 03181 if(data->is_relative && (data->password || data->username)) { 03182 if(!canonicalize_path_opaque(data, uri, flags, computeOnly)) 03183 return FALSE; 03184 } else { 03185 if(!canonicalize_path_hierarchical(data, uri, flags, computeOnly)) 03186 return FALSE; 03187 } 03188 } else { 03189 /* Opaque URI's don't have an authority. */ 03190 uri->userinfo_start = uri->userinfo_split = -1; 03191 uri->userinfo_len = 0; 03192 uri->host_start = -1; 03193 uri->host_len = 0; 03194 uri->host_type = Uri_HOST_UNKNOWN; 03195 uri->has_port = FALSE; 03196 uri->authority_start = -1; 03197 uri->authority_len = 0; 03198 uri->domain_offset = -1; 03199 uri->port_offset = -1; 03200 03201 if(is_hierarchical_scheme(data->scheme_type)) { 03202 DWORD i; 03203 03204 /* Absolute URIs aren't displayed for known scheme types 03205 * which should be hierarchical URIs. 03206 */ 03207 uri->display_modifiers |= URI_DISPLAY_NO_ABSOLUTE_URI; 03208 03209 /* Windows also sets the port for these (if they have one). */ 03210 for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) { 03211 if(data->scheme_type == default_ports[i].scheme) { 03212 uri->has_port = TRUE; 03213 uri->port = default_ports[i].port; 03214 break; 03215 } 03216 } 03217 } 03218 03219 if(!canonicalize_path_opaque(data, uri, flags, computeOnly)) 03220 return FALSE; 03221 } 03222 03223 if(uri->path_start > -1 && !computeOnly) 03224 /* Finding file extensions happens for both types of URIs. */ 03225 uri->extension_offset = find_file_extension(uri->canon_uri+uri->path_start, uri->path_len); 03226 else 03227 uri->extension_offset = -1; 03228 03229 return TRUE; 03230 } 03231 03232 /* Attempts to canonicalize the query string of the URI. 03233 * 03234 * Things that happen: 03235 * 1) For known scheme types forbidden characters 03236 * are percent encoded, unless the NO_DECODE_EXTRA_INFO flag is set 03237 * or NO_ENCODE_FORBIDDEN_CHARACTERS is set. 03238 * 03239 * 2) For known scheme types, percent encoded, unreserved characters 03240 * are decoded as long as the NO_DECODE_EXTRA_INFO flag isn't set. 03241 */ 03242 static BOOL canonicalize_query(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 03243 const WCHAR *ptr, *end; 03244 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 03245 03246 if(!data->query) { 03247 uri->query_start = -1; 03248 uri->query_len = 0; 03249 return TRUE; 03250 } 03251 03252 uri->query_start = uri->canon_len; 03253 03254 end = data->query+data->query_len; 03255 for(ptr = data->query; ptr < end; ++ptr) { 03256 if(*ptr == '%') { 03257 if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 03258 WCHAR val = decode_pct_val(ptr); 03259 if(is_unreserved(val)) { 03260 if(!computeOnly) 03261 uri->canon_uri[uri->canon_len] = val; 03262 ++uri->canon_len; 03263 03264 ptr += 2; 03265 continue; 03266 } 03267 } 03268 } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr)) { 03269 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) && 03270 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 03271 if(!computeOnly) 03272 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 03273 uri->canon_len += 3; 03274 continue; 03275 } 03276 } 03277 03278 if(!computeOnly) 03279 uri->canon_uri[uri->canon_len] = *ptr; 03280 ++uri->canon_len; 03281 } 03282 03283 uri->query_len = uri->canon_len - uri->query_start; 03284 03285 if(!computeOnly) 03286 TRACE("(%p %p %x %d): Canonicalized query string %s len=%d\n", data, uri, flags, 03287 computeOnly, debugstr_wn(uri->canon_uri+uri->query_start, uri->query_len), 03288 uri->query_len); 03289 return TRUE; 03290 } 03291 03292 static BOOL canonicalize_fragment(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 03293 const WCHAR *ptr, *end; 03294 const BOOL known_scheme = data->scheme_type != URL_SCHEME_UNKNOWN; 03295 03296 if(!data->fragment) { 03297 uri->fragment_start = -1; 03298 uri->fragment_len = 0; 03299 return TRUE; 03300 } 03301 03302 uri->fragment_start = uri->canon_len; 03303 03304 end = data->fragment + data->fragment_len; 03305 for(ptr = data->fragment; ptr < end; ++ptr) { 03306 if(*ptr == '%') { 03307 if(known_scheme && !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 03308 WCHAR val = decode_pct_val(ptr); 03309 if(is_unreserved(val)) { 03310 if(!computeOnly) 03311 uri->canon_uri[uri->canon_len] = val; 03312 ++uri->canon_len; 03313 03314 ptr += 2; 03315 continue; 03316 } 03317 } 03318 } else if(known_scheme && !is_unreserved(*ptr) && !is_reserved(*ptr)) { 03319 if(!(flags & Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS) && 03320 !(flags & Uri_CREATE_NO_DECODE_EXTRA_INFO)) { 03321 if(!computeOnly) 03322 pct_encode_val(*ptr, uri->canon_uri+uri->canon_len); 03323 uri->canon_len += 3; 03324 continue; 03325 } 03326 } 03327 03328 if(!computeOnly) 03329 uri->canon_uri[uri->canon_len] = *ptr; 03330 ++uri->canon_len; 03331 } 03332 03333 uri->fragment_len = uri->canon_len - uri->fragment_start; 03334 03335 if(!computeOnly) 03336 TRACE("(%p %p %x %d): Canonicalized fragment %s len=%d\n", data, uri, flags, 03337 computeOnly, debugstr_wn(uri->canon_uri+uri->fragment_start, uri->fragment_len), 03338 uri->fragment_len); 03339 return TRUE; 03340 } 03341 03342 /* Canonicalizes the scheme information specified in the parse_data using the specified flags. */ 03343 static BOOL canonicalize_scheme(const parse_data *data, Uri *uri, DWORD flags, BOOL computeOnly) { 03344 uri->scheme_start = -1; 03345 uri->scheme_len = 0; 03346 03347 if(!data->scheme) { 03348 /* The only type of URI that doesn't have to have a scheme is a relative 03349 * URI. 03350 */ 03351 if(!data->is_relative) { 03352 FIXME("(%p %p %x): Unable to determine the scheme type of %s.\n", data, 03353 uri, flags, debugstr_w(data->uri)); 03354 return FALSE; 03355 } 03356 } else { 03357 if(!computeOnly) { 03358 DWORD i; 03359 INT pos = uri->canon_len; 03360 03361 for(i = 0; i < data->scheme_len; ++i) { 03362 /* Scheme name must be lower case after canonicalization. */ 03363 uri->canon_uri[i + pos] = tolowerW(data->scheme[i]); 03364 } 03365 03366 uri->canon_uri[i + pos] = ':'; 03367 uri->scheme_start = pos; 03368 03369 TRACE("(%p %p %x): Canonicalized scheme=%s, len=%d.\n", data, uri, flags, 03370 debugstr_wn(uri->canon_uri, uri->scheme_len), data->scheme_len); 03371 } 03372 03373 /* This happens in both computation modes. */ 03374 uri->canon_len += data->scheme_len + 1; 03375 uri->scheme_len = data->scheme_len; 03376 } 03377 return TRUE; 03378 } 03379 03380 /* Compute's what the length of the URI specified by the parse_data will be 03381 * after canonicalization occurs using the specified flags. 03382 * 03383 * This function will return a non-zero value indicating the length of the canonicalized 03384 * URI, or -1 on error. 03385 */ 03386 static int compute_canonicalized_length(const parse_data *data, DWORD flags) { 03387 Uri uri; 03388 03389 memset(&uri, 0, sizeof(Uri)); 03390 03391 TRACE("(%p %x): Beginning to compute canonicalized length for URI %s\n", data, flags, 03392 debugstr_w(data->uri)); 03393 03394 if(!canonicalize_scheme(data, &uri, flags, TRUE)) { 03395 ERR("(%p %x): Failed to compute URI scheme length.\n", data, flags); 03396 return -1; 03397 } 03398 03399 if(!canonicalize_hierpart(data, &uri, flags, TRUE)) { 03400 ERR("(%p %x): Failed to compute URI hierpart length.\n", data, flags); 03401 return -1; 03402 } 03403 03404 if(!canonicalize_query(data, &uri, flags, TRUE)) { 03405 ERR("(%p %x): Failed to compute query string length.\n", data, flags); 03406 return -1; 03407 } 03408 03409 if(!canonicalize_fragment(data, &uri, flags, TRUE)) { 03410 ERR("(%p %x): Failed to compute fragment length.\n", data, flags); 03411 return -1; 03412 } 03413 03414 TRACE("(%p %x): Finished computing canonicalized URI length. length=%d\n", data, flags, uri.canon_len); 03415 03416 return uri.canon_len; 03417 } 03418 03419 /* Canonicalizes the URI data specified in the parse_data, using the given flags. If the 03420 * canonicalization succeededs it will store all the canonicalization information 03421 * in the pointer to the Uri. 03422 * 03423 * To canonicalize a URI this function first computes what the length of the URI 03424 * specified by the parse_data will be. Once this is done it will then perfom the actual 03425 * canonicalization of the URI. 03426 */ 03427 static HRESULT canonicalize_uri(const parse_data *data, Uri *uri, DWORD flags) { 03428 INT len; 03429 03430 uri->canon_uri = NULL; 03431 uri->canon_size = uri->canon_len = 0; 03432 03433 TRACE("(%p %p %x): beginning to canonicalize URI %s.\n", data, uri, flags, debugstr_w(data->uri)); 03434 03435 /* First try to compute the length of the URI. */ 03436 len = compute_canonicalized_length(data, flags); 03437 if(len == -1) { 03438 ERR("(%p %p %x): Could not compute the canonicalized length of %s.\n", data, uri, flags, 03439 debugstr_w(data->uri)); 03440 return E_INVALIDARG; 03441 } 03442 03443 uri->canon_uri = heap_alloc((len+1)*sizeof(WCHAR)); 03444 if(!uri->canon_uri) 03445 return E_OUTOFMEMORY; 03446 03447 uri->canon_size = len; 03448 if(!canonicalize_scheme(data, uri, flags, FALSE)) { 03449 ERR("(%p %p %x): Unable to canonicalize the scheme of the URI.\n", data, uri, flags); 03450 return E_INVALIDARG; 03451 } 03452 uri->scheme_type = data->scheme_type; 03453 03454 if(!canonicalize_hierpart(data, uri, flags, FALSE)) { 03455 ERR("(%p %p %x): Unable to canonicalize the heirpart of the URI\n", data, uri, flags); 03456 return E_INVALIDARG; 03457 } 03458 03459 if(!canonicalize_query(data, uri, flags, FALSE)) { 03460 ERR("(%p %p %x): Unable to canonicalize query string of the URI.\n", 03461 data, uri, flags); 03462 return E_INVALIDARG; 03463 } 03464 03465 if(!canonicalize_fragment(data, uri, flags, FALSE)) { 03466 ERR("(%p %p %x): Unable to canonicalize fragment of the URI.\n", 03467 data, uri, flags); 03468 return E_INVALIDARG; 03469 } 03470 03471 /* There's a possibility we didn't use all the space we allocated 03472 * earlier. 03473 */ 03474 if(uri->canon_len < uri->canon_size) { 03475 /* This happens if the URI is hierarchical and dot 03476 * segments were removed from it's path. 03477 */ 03478 WCHAR *tmp = heap_realloc(uri->canon_uri, (uri->canon_len+1)*sizeof(WCHAR)); 03479 if(!tmp) 03480 return E_OUTOFMEMORY; 03481 03482 uri->canon_uri = tmp; 03483 uri->canon_size = uri->canon_len; 03484 } 03485 03486 uri->canon_uri[uri->canon_len] = '\0'; 03487 TRACE("(%p %p %x): finished canonicalizing the URI. uri=%s\n", data, uri, flags, debugstr_w(uri->canon_uri)); 03488 03489 return S_OK; 03490 } 03491 03492 static HRESULT get_builder_component(LPWSTR *component, DWORD *component_len, 03493 LPCWSTR source, DWORD source_len, 03494 LPCWSTR *output, DWORD *output_len) 03495 { 03496 if(!output_len) { 03497 if(output) 03498 *output = NULL; 03499 return E_POINTER; 03500 } 03501 03502 if(!output) { 03503 *output_len = 0; 03504 return E_POINTER; 03505 } 03506 03507 if(!(*component) && source) { 03508 /* Allocate 'component', and copy the contents from 'source' 03509 * into the new allocation. 03510 */ 03511 *component = heap_alloc((source_len+1)*sizeof(WCHAR)); 03512 if(!(*component)) 03513 return E_OUTOFMEMORY; 03514 03515 memcpy(*component, source, source_len*sizeof(WCHAR)); 03516 (*component)[source_len] = '\0'; 03517 *component_len = source_len; 03518 } 03519 03520 *output = *component; 03521 *output_len = *component_len; 03522 return *output ? S_OK : S_FALSE; 03523 } 03524 03525 /* Allocates 'component' and copies the string from 'new_value' into 'component'. 03526 * If 'prefix' is set and 'new_value' isn't NULL, then it checks if 'new_value' 03527 * starts with 'prefix'. If it doesn't then 'prefix' is prepended to 'component'. 03528 * 03529 * If everything is successful, then will set 'success_flag' in 'flags'. 03530 */ 03531 static HRESULT set_builder_component(LPWSTR *component, DWORD *component_len, LPCWSTR new_value, 03532 WCHAR prefix, DWORD *flags, DWORD success_flag) 03533 { 03534 heap_free(*component); 03535 03536 if(!new_value) { 03537 *component = NULL; 03538 *component_len = 0; 03539 } else { 03540 BOOL add_prefix = FALSE; 03541 DWORD len = lstrlenW(new_value); 03542 DWORD pos = 0; 03543 03544 if(prefix && *new_value != prefix) { 03545 add_prefix = TRUE; 03546 *component = heap_alloc((len+2)*sizeof(WCHAR)); 03547 } else 03548 *component = heap_alloc((len+1)*sizeof(WCHAR)); 03549 03550 if(!(*component)) 03551 return E_OUTOFMEMORY; 03552 03553 if(add_prefix) 03554 (*component)[pos++] = prefix; 03555 03556 memcpy(*component+pos, new_value, (len+1)*sizeof(WCHAR)); 03557 *component_len = len+pos; 03558 } 03559 03560 *flags |= success_flag; 03561 return S_OK; 03562 } 03563 03564 static void reset_builder(UriBuilder *builder) { 03565 if(builder->uri) 03566 IUri_Release(&builder->uri->IUri_iface); 03567 builder->uri = NULL; 03568 03569 heap_free(builder->fragment); 03570 builder->fragment = NULL; 03571 builder->fragment_len = 0; 03572 03573 heap_free(builder->host); 03574 builder->host = NULL; 03575 builder->host_len = 0; 03576 03577 heap_free(builder->password); 03578 builder->password = NULL; 03579 builder->password_len = 0; 03580 03581 heap_free(builder->path); 03582 builder->path = NULL; 03583 builder->path_len = 0; 03584 03585 heap_free(builder->query); 03586 builder->query = NULL; 03587 builder->query_len = 0; 03588 03589 heap_free(builder->scheme); 03590 builder->scheme = NULL; 03591 builder->scheme_len = 0; 03592 03593 heap_free(builder->username); 03594 builder->username = NULL; 03595 builder->username_len = 0; 03596 03597 builder->has_port = FALSE; 03598 builder->port = 0; 03599 builder->modified_props = 0; 03600 } 03601 03602 static HRESULT validate_scheme_name(const UriBuilder *builder, parse_data *data, DWORD flags) { 03603 const WCHAR *component; 03604 const WCHAR *ptr; 03605 const WCHAR **pptr; 03606 DWORD expected_len; 03607 03608 if(builder->scheme) { 03609 ptr = builder->scheme; 03610 expected_len = builder->scheme_len; 03611 } else if(builder->uri && builder->uri->scheme_start > -1) { 03612 ptr = builder->uri->canon_uri+builder->uri->scheme_start; 03613 expected_len = builder->uri->scheme_len; 03614 } else { 03615 static const WCHAR nullW[] = {0}; 03616 ptr = nullW; 03617 expected_len = 0; 03618 } 03619 03620 component = ptr; 03621 pptr = &ptr; 03622 if(parse_scheme(pptr, data, flags, ALLOW_NULL_TERM_SCHEME) && 03623 data->scheme_len == expected_len) { 03624 if(data->scheme) 03625 TRACE("(%p %p %x): Found valid scheme component %s len=%d.\n", builder, data, flags, 03626 debugstr_wn(data->scheme, data->scheme_len), data->scheme_len); 03627 } else { 03628 TRACE("(%p %p %x): Invalid scheme component found %s.\n", builder, data, flags, 03629 debugstr_wn(component, expected_len)); 03630 return INET_E_INVALID_URL; 03631 } 03632 03633 return S_OK; 03634 } 03635 03636 static HRESULT validate_username(const UriBuilder *builder, parse_data *data, DWORD flags) { 03637 const WCHAR *ptr; 03638 const WCHAR **pptr; 03639 DWORD expected_len; 03640 03641 if(builder->username) { 03642 ptr = builder->username; 03643 expected_len = builder->username_len; 03644 } else if(!(builder->modified_props & Uri_HAS_USER_NAME) && builder->uri && 03645 builder->uri->userinfo_start > -1 && builder->uri->userinfo_split != 0) { 03646 /* Just use the username from the base Uri. */ 03647 data->username = builder->uri->canon_uri+builder->uri->userinfo_start; 03648 data->username_len = (builder->uri->userinfo_split > -1) ? 03649 builder->uri->userinfo_split : builder->uri->userinfo_len; 03650 ptr = NULL; 03651 } else { 03652 ptr = NULL; 03653 expected_len = 0; 03654 } 03655 03656 if(ptr) { 03657 const WCHAR *component = ptr; 03658 pptr = &ptr; 03659 if(parse_username(pptr, data, flags, ALLOW_NULL_TERM_USER_NAME) && 03660 data->username_len == expected_len) 03661 TRACE("(%p %p %x): Found valid username component %s len=%d.\n", builder, data, flags, 03662 debugstr_wn(data->username, data->username_len), data->username_len); 03663 else { 03664 TRACE("(%p %p %x): Invalid username component found %s.\n", builder, data, flags, 03665 debugstr_wn(component, expected_len)); 03666 return INET_E_INVALID_URL; 03667 } 03668 } 03669 03670 return S_OK; 03671 } 03672 03673 static HRESULT validate_password(const UriBuilder *builder, parse_data *data, DWORD flags) { 03674 const WCHAR *ptr; 03675 const WCHAR **pptr; 03676 DWORD expected_len; 03677 03678 if(builder->password) { 03679 ptr = builder->password; 03680 expected_len = builder->password_len; 03681 } else if(!(builder->modified_props & Uri_HAS_PASSWORD) && builder->uri && 03682 builder->uri->userinfo_split > -1) { 03683 data->password = builder->uri->canon_uri+builder->uri->userinfo_start+builder->uri->userinfo_split+1; 03684 data->password_len = builder->uri->userinfo_len-builder->uri->userinfo_split-1; 03685 ptr = NULL; 03686 } else { 03687 ptr = NULL; 03688 expected_len = 0; 03689 } 03690 03691 if(ptr) { 03692 const WCHAR *component = ptr; 03693 pptr = &ptr; 03694 if(parse_password(pptr, data, flags, ALLOW_NULL_TERM_PASSWORD) && 03695 data->password_len == expected_len) 03696 TRACE("(%p %p %x): Found valid password component %s len=%d.\n", builder, data, flags, 03697 debugstr_wn(data->password, data->password_len), data->password_len); 03698 else { 03699 TRACE("(%p %p %x): Invalid password component found %s.\n", builder, data, flags, 03700 debugstr_wn(component, expected_len)); 03701 return INET_E_INVALID_URL; 03702 } 03703 } 03704 03705 return S_OK; 03706 } 03707 03708 static HRESULT validate_userinfo(const UriBuilder *builder, parse_data *data, DWORD flags) { 03709 HRESULT hr; 03710 03711 hr = validate_username(builder, data, flags); 03712 if(FAILED(hr)) 03713 return hr; 03714 03715 hr = validate_password(builder, data, flags); 03716 if(FAILED(hr)) 03717 return hr; 03718 03719 return S_OK; 03720 } 03721 03722 static HRESULT validate_host(const UriBuilder *builder, parse_data *data, DWORD flags) { 03723 const WCHAR *ptr; 03724 const WCHAR **pptr; 03725 DWORD expected_len; 03726 03727 if(builder->host) { 03728 ptr = builder->host; 03729 expected_len = builder->host_len; 03730 } else if(!(builder->modified_props & Uri_HAS_HOST) && builder->uri && builder->uri->host_start > -1) { 03731 ptr = builder->uri->canon_uri + builder->uri->host_start; 03732 expected_len = builder->uri->host_len; 03733 } else 03734 ptr = NULL; 03735 03736 if(ptr) { 03737 const WCHAR *component = ptr; 03738 DWORD extras = ALLOW_BRACKETLESS_IP_LITERAL|IGNORE_PORT_DELIMITER|SKIP_IP_FUTURE_CHECK; 03739 pptr = &ptr; 03740 03741 if(parse_host(pptr, data, flags, extras) && data->host_len == expected_len) 03742 TRACE("(%p %p %x): Found valid host name %s len=%d type=%d.\n", builder, data, flags, 03743 debugstr_wn(data->host, data->host_len), data->host_len, data->host_type); 03744 else { 03745 TRACE("(%p %p %x): Invalid host name found %s.\n", builder, data, flags, 03746 debugstr_wn(component, expected_len)); 03747 return INET_E_INVALID_URL; 03748 } 03749 } 03750 03751 return S_OK; 03752 } 03753 03754 static void setup_port(const UriBuilder *builder, parse_data *data, DWORD flags) { 03755 if(builder->modified_props & Uri_HAS_PORT) { 03756 if(builder->has_port) { 03757 data->has_port = TRUE; 03758 data->port_value = builder->port; 03759 } 03760 } else if(builder->uri && builder->uri->has_port) { 03761 data->has_port = TRUE; 03762 data->port_value = builder->uri->port; 03763 } 03764 03765 if(data->has_port) 03766 TRACE("(%p %p %x): Using %u as port for IUri.\n", builder, data, flags, data->port_value); 03767 } 03768 03769 static HRESULT validate_path(const UriBuilder *builder, parse_data *data, DWORD flags) { 03770 const WCHAR *ptr = NULL; 03771 const WCHAR *component; 03772 const WCHAR **pptr; 03773 DWORD expected_len; 03774 BOOL check_len = TRUE; 03775 BOOL valid = FALSE; 03776 03777 if(builder->path) { 03778 ptr = builder->path; 03779 expected_len = builder->path_len; 03780 } else if(!(builder->modified_props & Uri_HAS_PATH) && 03781 builder->uri && builder->uri->path_start > -1) { 03782 ptr = builder->uri->canon_uri+builder->uri->path_start; 03783 expected_len = builder->uri->path_len; 03784 } else { 03785 static const WCHAR nullW[] = {0}; 03786 ptr = nullW; 03787 check_len = FALSE; 03788 } 03789 03790 component = ptr; 03791 pptr = &ptr; 03792 03793 /* How the path is validated depends on what type of 03794 * URI it is. 03795 */ 03796 valid = data->is_opaque ? 03797 parse_path_opaque(pptr, data, flags) : parse_path_hierarchical(pptr, data, flags); 03798 03799 if(!valid || (check_len && expected_len != data->path_len)) { 03800 TRACE("(%p %p %x): Invalid path component %s.\n", builder, data, flags, 03801 debugstr_wn(component, check_len ? expected_len : -1) ); 03802 return INET_E_INVALID_URL; 03803 } 03804 03805 TRACE("(%p %p %x): Valid path component %s len=%d.\n", builder, data, flags, 03806 debugstr_wn(data->path, data->path_len), data->path_len); 03807 03808 return S_OK; 03809 } 03810 03811 static HRESULT validate_query(const UriBuilder *builder, parse_data *data, DWORD flags) { 03812 const WCHAR *ptr = NULL; 03813 const WCHAR **pptr; 03814 DWORD expected_len; 03815 03816 if(builder->query) { 03817 ptr = builder->query; 03818 expected_len = builder->query_len; 03819 } else if(!(builder->modified_props & Uri_HAS_QUERY) && builder->uri && 03820 builder->uri->query_start > -1) { 03821 ptr = builder->uri->canon_uri+builder->uri->query_start; 03822 expected_len = builder->uri->query_len; 03823 } 03824 03825 if(ptr) { 03826 const WCHAR *component = ptr; 03827 pptr = &ptr; 03828 03829 if(parse_query(pptr, data, flags) && expected_len == data->query_len) 03830 TRACE("(%p %p %x): Valid query component %s len=%d.\n", builder, data, flags, 03831 debugstr_wn(data->query, data->query_len), data->query_len); 03832 else { 03833 TRACE("(%p %p %x): Invalid query component %s.\n", builder, data, flags, 03834 debugstr_wn(component, expected_len)); 03835 return INET_E_INVALID_URL; 03836 } 03837 } 03838 03839 return S_OK; 03840 } 03841 03842 static HRESULT validate_fragment(const UriBuilder *builder, parse_data *data, DWORD flags) { 03843 const WCHAR *ptr = NULL; 03844 const WCHAR **pptr; 03845 DWORD expected_len; 03846 03847 if(builder->fragment) { 03848 ptr = builder->fragment; 03849 expected_len = builder->fragment_len; 03850 } else if(!(builder->modified_props & Uri_HAS_FRAGMENT) && builder->uri && 03851 builder->uri->fragment_start > -1) { 03852 ptr = builder->uri->canon_uri+builder->uri->fragment_start; 03853 expected_len = builder->uri->fragment_len; 03854 } 03855 03856 if(ptr) { 03857 const WCHAR *component = ptr; 03858 pptr = &ptr; 03859 03860 if(parse_fragment(pptr, data, flags) && expected_len == data->fragment_len) 03861 TRACE("(%p %p %x): Valid fragment component %s len=%d.\n", builder, data, flags, 03862 debugstr_wn(data->fragment, data->fragment_len), data->fragment_len); 03863 else { 03864 TRACE("(%p %p %x): Invalid fragment component %s.\n", builder, data, flags, 03865 debugstr_wn(component, expected_len)); 03866 return INET_E_INVALID_URL; 03867 } 03868 } 03869 03870 return S_OK; 03871 } 03872 03873 static HRESULT validate_components(const UriBuilder *builder, parse_data *data, DWORD flags) { 03874 HRESULT hr; 03875 03876 memset(data, 0, sizeof(parse_data)); 03877 03878 TRACE("(%p %p %x): Beginning to validate builder components.\n", builder, data, flags); 03879 03880 hr = validate_scheme_name(builder, data, flags); 03881 if(FAILED(hr)) 03882 return hr; 03883 03884 /* Extra validation for file schemes. */ 03885 if(data->scheme_type == URL_SCHEME_FILE) { 03886 if((builder->password || (builder->uri && builder->uri->userinfo_split > -1)) || 03887 (builder->username || (builder->uri && builder->uri->userinfo_start > -1))) { 03888 TRACE("(%p %p %x): File schemes can't contain a username or password.\n", 03889 builder, data, flags); 03890 return INET_E_INVALID_URL; 03891 } 03892 } 03893 03894 hr = validate_userinfo(builder, data, flags); 03895 if(FAILED(hr)) 03896 return hr; 03897 03898 hr = validate_host(builder, data, flags); 03899 if(FAILED(hr)) 03900 return hr; 03901 03902 setup_port(builder, data, flags); 03903 03904 /* The URI is opaque if it doesn't have an authority component. */ 03905 if(!data->is_relative) 03906 data->is_opaque = !data->username && !data->password && !data->host && !data->has_port; 03907 else 03908 data->is_opaque = !data->host && !data->has_port; 03909 03910 hr = validate_path(builder, data, flags); 03911 if(FAILED(hr)) 03912 return hr; 03913 03914 hr = validate_query(builder, data, flags); 03915 if(FAILED(hr)) 03916 return hr; 03917 03918 hr = validate_fragment(builder, data, flags); 03919 if(FAILED(hr)) 03920 return hr; 03921 03922 TRACE("(%p %p %x): Finished validating builder components.\n", builder, data, flags); 03923 03924 return S_OK; 03925 } 03926 03927 static void convert_to_dos_path(const WCHAR *path, DWORD path_len, 03928 WCHAR *output, DWORD *output_len) 03929 { 03930 const WCHAR *ptr = path; 03931 03932 if(path_len > 3 && *ptr == '/' && is_drive_path(path+1)) 03933 /* Skip over the leading / before the drive path. */ 03934 ++ptr; 03935 03936 for(; ptr < path+path_len; ++ptr) { 03937 if(*ptr == '/') { 03938 if(output) 03939 *output++ = '\\'; 03940 (*output_len)++; 03941 } else { 03942 if(output) 03943 *output++ = *ptr; 03944 (*output_len)++; 03945 } 03946 } 03947 } 03948 03949 /* Generates a raw uri string using the parse_data. */ 03950 static DWORD generate_raw_uri(const parse_data *data, BSTR uri, DWORD flags) { 03951 DWORD length = 0; 03952 03953 if(data->scheme) { 03954 if(uri) { 03955 memcpy(uri, data->scheme, data->scheme_len*sizeof(WCHAR)); 03956 uri[data->scheme_len] = ':'; 03957 } 03958 length += data->scheme_len+1; 03959 } 03960 03961 if(!data->is_opaque) { 03962 /* For the "//" which appears before the authority component. */ 03963 if(uri) { 03964 uri[length] = '/'; 03965 uri[length+1] = '/'; 03966 } 03967 length += 2; 03968 03969 /* Check if we need to add the "\\" before the host name 03970 * of a UNC server name in a DOS path. 03971 */ 03972 if(flags & RAW_URI_CONVERT_TO_DOS_PATH && 03973 data->scheme_type == URL_SCHEME_FILE && data->host) { 03974 if(uri) { 03975 uri[length] = '\\'; 03976 uri[length+1] = '\\'; 03977 } 03978 length += 2; 03979 } 03980 } 03981 03982 if(data->username) { 03983 if(uri) 03984 memcpy(uri+length, data->username, data->username_len*sizeof(WCHAR)); 03985 length += data->username_len; 03986 } 03987 03988 if(data->password) { 03989 if(uri) { 03990 uri[length] = ':'; 03991 memcpy(uri+length+1, data->password, data->password_len*sizeof(WCHAR)); 03992 } 03993 length += data->password_len+1; 03994 } 03995 03996 if(data->password || data->username) { 03997 if(uri) 03998 uri[length] = '@'; 03999 ++length; 04000 } 04001 04002 if(data->host) { 04003 /* IPv6 addresses get the brackets added around them if they don't already 04004 * have them. 04005 */ 04006 const BOOL add_brackets = data->host_type == Uri_HOST_IPV6 && *(data->host) != '['; 04007 if(add_brackets) { 04008 if(uri) 04009 uri[length] = '['; 04010 ++length; 04011 } 04012 04013 if(uri) 04014 memcpy(uri+length, data->host, data->host_len*sizeof(WCHAR)); 04015 length += data->host_len; 04016 04017 if(add_brackets) { 04018 if(uri) 04019 uri[length] = ']'; 04020 length++; 04021 } 04022 } 04023 04024 if(data->has_port) { 04025 /* The port isn't included in the raw uri if it's the default 04026 * port for the scheme type. 04027 */ 04028 DWORD i; 04029 BOOL is_default = FALSE; 04030 04031 for(i = 0; i < sizeof(default_ports)/sizeof(default_ports[0]); ++i) { 04032 if(data->scheme_type == default_ports[i].scheme && 04033 data->port_value == default_ports[i].port) 04034 is_default = TRUE; 04035 } 04036 04037 if(!is_default || flags & RAW_URI_FORCE_PORT_DISP) { 04038 if(uri) 04039 uri[length] = ':'; 04040 ++length; 04041 04042 if(uri) 04043 length += ui2str(uri+length, data->port_value); 04044 else 04045 length += ui2str(NULL, data->port_value); 04046 } 04047 } 04048 04049 /* Check if a '/' should be added before the path for hierarchical URIs. */ 04050 if(!data->is_opaque && data->path && *(data->path) != '/') { 04051 if(uri) 04052 uri[length] = '/'; 04053 ++length; 04054 } 04055 04056 if(data->path) { 04057 if(!data->is_opaque && data->scheme_type == URL_SCHEME_FILE && 04058 flags & RAW_URI_CONVERT_TO_DOS_PATH) { 04059 DWORD len = 0; 04060 04061 if(uri) 04062 convert_to_dos_path(data->path, data->path_len, uri+length, &len); 04063 else 04064 convert_to_dos_path(data->path, data->path_len, NULL, &len); 04065 04066 length += len; 04067 } else { 04068 if(uri) 04069 memcpy(uri+length, data->path, data->path_len*sizeof(WCHAR)); 04070 length += data->path_len; 04071 } 04072 } 04073 04074 if(data->query) { 04075 if(uri) 04076 memcpy(uri+length, data->query, data->query_len*sizeof(WCHAR)); 04077 length += data->query_len; 04078 } 04079 04080 if(data->fragment) { 04081 if(uri) 04082 memcpy(uri+length, data->fragment, data->fragment_len*sizeof(WCHAR)); 04083 length += data->fragment_len; 04084 } 04085 04086 if(uri) 04087 TRACE("(%p %p): Generated raw uri=%s len=%d\n", data, uri, debugstr_wn(uri, length), length); 04088 else 04089 TRACE("(%p %p): Computed raw uri len=%d\n", data, uri, length); 04090 04091 return length; 04092 } 04093 04094 static HRESULT generate_uri(const UriBuilder *builder, const parse_data *data, Uri *uri, DWORD flags) { 04095 HRESULT hr; 04096 DWORD length = generate_raw_uri(data, NULL, 0); 04097 uri->raw_uri = SysAllocStringLen(NULL, length); 04098 if(!uri->raw_uri) 04099 return E_OUTOFMEMORY; 04100 04101 generate_raw_uri(data, uri->raw_uri, 0); 04102 04103 hr = canonicalize_uri(data, uri, flags); 04104 if(FAILED(hr)) { 04105 if(hr == E_INVALIDARG) 04106 return INET_E_INVALID_URL; 04107 return hr; 04108 } 04109 04110 uri->create_flags = flags; 04111 return S_OK; 04112 } 04113 04114 static inline Uri* impl_from_IUri(IUri *iface) 04115 { 04116 return CONTAINING_RECORD(iface, Uri, IUri_iface); 04117 } 04118 04119 static inline void destory_uri_obj(Uri *This) 04120 { 04121 SysFreeString(This->raw_uri); 04122 heap_free(This->canon_uri); 04123 heap_free(This); 04124 } 04125 04126 static HRESULT WINAPI Uri_QueryInterface(IUri *iface, REFIID riid, void **ppv) 04127 { 04128 Uri *This = impl_from_IUri(iface); 04129 04130 if(IsEqualGUID(&IID_IUnknown, riid)) { 04131 TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv); 04132 *ppv = &This->IUri_iface; 04133 }else if(IsEqualGUID(&IID_IUri, riid)) { 04134 TRACE("(%p)->(IID_IUri %p)\n", This, ppv); 04135 *ppv = &This->IUri_iface; 04136 }else if(IsEqualGUID(&IID_IUriBuilderFactory, riid)) { 04137 TRACE("(%p)->(IID_IUriBuilderFactory %p)\n", This, riid); 04138 *ppv = &This->IUriBuilderFactory_iface; 04139 }else if(IsEqualGUID(&IID_IUriObj, riid)) { 04140 TRACE("(%p)->(IID_IUriObj %p)\n", This, ppv); 04141 *ppv = This; 04142 return S_OK; 04143 }else { 04144 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv); 04145 *ppv = NULL; 04146 return E_NOINTERFACE; 04147 } 04148 04149 IUnknown_AddRef((IUnknown*)*ppv); 04150 return S_OK; 04151 } 04152 04153 static ULONG WINAPI Uri_AddRef(IUri *iface) 04154 { 04155 Uri *This = impl_from_IUri(iface); 04156 LONG ref = InterlockedIncrement(&This->ref); 04157 04158 TRACE("(%p) ref=%d\n", This, ref); 04159 04160 return ref; 04161 } 04162 04163 static ULONG WINAPI Uri_Release(IUri *iface) 04164 { 04165 Uri *This = impl_from_IUri(iface); 04166 LONG ref = InterlockedDecrement(&This->ref); 04167 04168 TRACE("(%p) ref=%d\n", This, ref); 04169 04170 if(!ref) 04171 destory_uri_obj(This); 04172 04173 return ref; 04174 } 04175 04176 static HRESULT WINAPI Uri_GetPropertyBSTR(IUri *iface, Uri_PROPERTY uriProp, BSTR *pbstrProperty, DWORD dwFlags) 04177 { 04178 Uri *This = impl_from_IUri(iface); 04179 HRESULT hres; 04180 TRACE("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags); 04181 04182 if(!pbstrProperty) 04183 return E_POINTER; 04184 04185 if(uriProp > Uri_PROPERTY_STRING_LAST) { 04186 /* Windows allocates an empty BSTR for invalid Uri_PROPERTY's. */ 04187 *pbstrProperty = SysAllocStringLen(NULL, 0); 04188 if(!(*pbstrProperty)) 04189 return E_OUTOFMEMORY; 04190 04191 /* It only returns S_FALSE for the ZONE property... */ 04192 if(uriProp == Uri_PROPERTY_ZONE) 04193 return S_FALSE; 04194 else 04195 return S_OK; 04196 } 04197 04198 /* Don't have support for flags yet. */ 04199 if(dwFlags) { 04200 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags); 04201 return E_NOTIMPL; 04202 } 04203 04204 switch(uriProp) { 04205 case Uri_PROPERTY_ABSOLUTE_URI: 04206 if(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI) { 04207 *pbstrProperty = SysAllocStringLen(NULL, 0); 04208 hres = S_FALSE; 04209 } else { 04210 if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) { 04211 if(This->userinfo_len == 0) { 04212 /* Don't include the '@' after the userinfo component. */ 04213 *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-1); 04214 hres = S_OK; 04215 if(*pbstrProperty) { 04216 /* Copy everything before it. */ 04217 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR)); 04218 04219 /* And everything after it. */ 04220 memcpy(*pbstrProperty+This->userinfo_start, This->canon_uri+This->userinfo_start+1, 04221 (This->canon_len-This->userinfo_start-1)*sizeof(WCHAR)); 04222 } 04223 } else if(This->userinfo_split == 0 && This->userinfo_len == 1) { 04224 /* Don't include the ":@" */ 04225 *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-2); 04226 hres = S_OK; 04227 if(*pbstrProperty) { 04228 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR)); 04229 memcpy(*pbstrProperty+This->userinfo_start, This->canon_uri+This->userinfo_start+2, 04230 (This->canon_len-This->userinfo_start-2)*sizeof(WCHAR)); 04231 } 04232 } else { 04233 *pbstrProperty = SysAllocString(This->canon_uri); 04234 hres = S_OK; 04235 } 04236 } else { 04237 *pbstrProperty = SysAllocString(This->canon_uri); 04238 hres = S_OK; 04239 } 04240 } 04241 04242 if(!(*pbstrProperty)) 04243 hres = E_OUTOFMEMORY; 04244 04245 break; 04246 case Uri_PROPERTY_AUTHORITY: 04247 if(This->authority_start > -1) { 04248 if(This->port_offset > -1 && is_default_port(This->scheme_type, This->port) && 04249 This->display_modifiers & URI_DISPLAY_NO_DEFAULT_PORT_AUTH) 04250 /* Don't include the port in the authority component. */ 04251 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->authority_start, This->port_offset); 04252 else 04253 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->authority_start, This->authority_len); 04254 hres = S_OK; 04255 } else { 04256 *pbstrProperty = SysAllocStringLen(NULL, 0); 04257 hres = S_FALSE; 04258 } 04259 04260 if(!(*pbstrProperty)) 04261 hres = E_OUTOFMEMORY; 04262 04263 break; 04264 case Uri_PROPERTY_DISPLAY_URI: 04265 /* The Display URI contains everything except for the userinfo for known 04266 * scheme types. 04267 */ 04268 if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) { 04269 *pbstrProperty = SysAllocStringLen(NULL, This->canon_len-This->userinfo_len); 04270 04271 if(*pbstrProperty) { 04272 /* Copy everything before the userinfo over. */ 04273 memcpy(*pbstrProperty, This->canon_uri, This->userinfo_start*sizeof(WCHAR)); 04274 /* Copy everything after the userinfo over. */ 04275 memcpy(*pbstrProperty+This->userinfo_start, 04276 This->canon_uri+This->userinfo_start+This->userinfo_len+1, 04277 (This->canon_len-(This->userinfo_start+This->userinfo_len+1))*sizeof(WCHAR)); 04278 } 04279 } else 04280 *pbstrProperty = SysAllocString(This->canon_uri); 04281 04282 if(!(*pbstrProperty)) 04283 hres = E_OUTOFMEMORY; 04284 else 04285 hres = S_OK; 04286 04287 break; 04288 case Uri_PROPERTY_DOMAIN: 04289 if(This->domain_offset > -1) { 04290 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+This->domain_offset, 04291 This->host_len-This->domain_offset); 04292 hres = S_OK; 04293 } else { 04294 *pbstrProperty = SysAllocStringLen(NULL, 0); 04295 hres = S_FALSE; 04296 } 04297 04298 if(!(*pbstrProperty)) 04299 hres = E_OUTOFMEMORY; 04300 04301 break; 04302 case Uri_PROPERTY_EXTENSION: 04303 if(This->extension_offset > -1) { 04304 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start+This->extension_offset, 04305 This->path_len-This->extension_offset); 04306 hres = S_OK; 04307 } else { 04308 *pbstrProperty = SysAllocStringLen(NULL, 0); 04309 hres = S_FALSE; 04310 } 04311 04312 if(!(*pbstrProperty)) 04313 hres = E_OUTOFMEMORY; 04314 04315 break; 04316 case Uri_PROPERTY_FRAGMENT: 04317 if(This->fragment_start > -1) { 04318 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->fragment_start, This->fragment_len); 04319 hres = S_OK; 04320 } else { 04321 *pbstrProperty = SysAllocStringLen(NULL, 0); 04322 hres = S_FALSE; 04323 } 04324 04325 if(!(*pbstrProperty)) 04326 hres = E_OUTOFMEMORY; 04327 04328 break; 04329 case Uri_PROPERTY_HOST: 04330 if(This->host_start > -1) { 04331 /* The '[' and ']' aren't included for IPv6 addresses. */ 04332 if(This->host_type == Uri_HOST_IPV6) 04333 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start+1, This->host_len-2); 04334 else 04335 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->host_start, This->host_len); 04336 04337 hres = S_OK; 04338 } else { 04339 *pbstrProperty = SysAllocStringLen(NULL, 0); 04340 hres = S_FALSE; 04341 } 04342 04343 if(!(*pbstrProperty)) 04344 hres = E_OUTOFMEMORY; 04345 04346 break; 04347 case Uri_PROPERTY_PASSWORD: 04348 if(This->userinfo_split > -1) { 04349 *pbstrProperty = SysAllocStringLen( 04350 This->canon_uri+This->userinfo_start+This->userinfo_split+1, 04351 This->userinfo_len-This->userinfo_split-1); 04352 hres = S_OK; 04353 } else { 04354 *pbstrProperty = SysAllocStringLen(NULL, 0); 04355 hres = S_FALSE; 04356 } 04357 04358 if(!(*pbstrProperty)) 04359 return E_OUTOFMEMORY; 04360 04361 break; 04362 case Uri_PROPERTY_PATH: 04363 if(This->path_start > -1) { 04364 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len); 04365 hres = S_OK; 04366 } else { 04367 *pbstrProperty = SysAllocStringLen(NULL, 0); 04368 hres = S_FALSE; 04369 } 04370 04371 if(!(*pbstrProperty)) 04372 hres = E_OUTOFMEMORY; 04373 04374 break; 04375 case Uri_PROPERTY_PATH_AND_QUERY: 04376 if(This->path_start > -1) { 04377 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->path_start, This->path_len+This->query_len); 04378 hres = S_OK; 04379 } else if(This->query_start > -1) { 04380 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len); 04381 hres = S_OK; 04382 } else { 04383 *pbstrProperty = SysAllocStringLen(NULL, 0); 04384 hres = S_FALSE; 04385 } 04386 04387 if(!(*pbstrProperty)) 04388 hres = E_OUTOFMEMORY; 04389 04390 break; 04391 case Uri_PROPERTY_QUERY: 04392 if(This->query_start > -1) { 04393 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->query_start, This->query_len); 04394 hres = S_OK; 04395 } else { 04396 *pbstrProperty = SysAllocStringLen(NULL, 0); 04397 hres = S_FALSE; 04398 } 04399 04400 if(!(*pbstrProperty)) 04401 hres = E_OUTOFMEMORY; 04402 04403 break; 04404 case Uri_PROPERTY_RAW_URI: 04405 *pbstrProperty = SysAllocString(This->raw_uri); 04406 if(!(*pbstrProperty)) 04407 hres = E_OUTOFMEMORY; 04408 else 04409 hres = S_OK; 04410 break; 04411 case Uri_PROPERTY_SCHEME_NAME: 04412 if(This->scheme_start > -1) { 04413 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->scheme_start, This->scheme_len); 04414 hres = S_OK; 04415 } else { 04416 *pbstrProperty = SysAllocStringLen(NULL, 0); 04417 hres = S_FALSE; 04418 } 04419 04420 if(!(*pbstrProperty)) 04421 hres = E_OUTOFMEMORY; 04422 04423 break; 04424 case Uri_PROPERTY_USER_INFO: 04425 if(This->userinfo_start > -1) { 04426 *pbstrProperty = SysAllocStringLen(This->canon_uri+This->userinfo_start, This->userinfo_len); 04427 hres = S_OK; 04428 } else { 04429 *pbstrProperty = SysAllocStringLen(NULL, 0); 04430 hres = S_FALSE; 04431 } 04432 04433 if(!(*pbstrProperty)) 04434 hres = E_OUTOFMEMORY; 04435 04436 break; 04437 case Uri_PROPERTY_USER_NAME: 04438 if(This->userinfo_start > -1 && This->userinfo_split != 0) { 04439 /* If userinfo_split is set, that means a password exists 04440 * so the username is only from userinfo_start to userinfo_split. 04441 */ 04442 if(This->userinfo_split > -1) { 04443 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_split); 04444 hres = S_OK; 04445 } else { 04446 *pbstrProperty = SysAllocStringLen(This->canon_uri + This->userinfo_start, This->userinfo_len); 04447 hres = S_OK; 04448 } 04449 } else { 04450 *pbstrProperty = SysAllocStringLen(NULL, 0); 04451 hres = S_FALSE; 04452 } 04453 04454 if(!(*pbstrProperty)) 04455 return E_OUTOFMEMORY; 04456 04457 break; 04458 default: 04459 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pbstrProperty, dwFlags); 04460 hres = E_NOTIMPL; 04461 } 04462 04463 return hres; 04464 } 04465 04466 static HRESULT WINAPI Uri_GetPropertyLength(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags) 04467 { 04468 Uri *This = impl_from_IUri(iface); 04469 HRESULT hres; 04470 TRACE("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags); 04471 04472 if(!pcchProperty) 04473 return E_INVALIDARG; 04474 04475 /* Can only return a length for a property if it's a string. */ 04476 if(uriProp > Uri_PROPERTY_STRING_LAST) 04477 return E_INVALIDARG; 04478 04479 /* Don't have support for flags yet. */ 04480 if(dwFlags) { 04481 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags); 04482 return E_NOTIMPL; 04483 } 04484 04485 switch(uriProp) { 04486 case Uri_PROPERTY_ABSOLUTE_URI: 04487 if(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI) { 04488 *pcchProperty = 0; 04489 hres = S_FALSE; 04490 } else { 04491 if(This->scheme_type != URL_SCHEME_UNKNOWN) { 04492 if(This->userinfo_start > -1 && This->userinfo_len == 0) 04493 /* Don't include the '@' in the length. */ 04494 *pcchProperty = This->canon_len-1; 04495 else if(This->userinfo_start > -1 && This->userinfo_len == 1 && 04496 This->userinfo_split == 0) 04497 /* Don't include the ":@" in the length. */ 04498 *pcchProperty = This->canon_len-2; 04499 else 04500 *pcchProperty = This->canon_len; 04501 } else 04502 *pcchProperty = This->canon_len; 04503 04504 hres = S_OK; 04505 } 04506 04507 break; 04508 case Uri_PROPERTY_AUTHORITY: 04509 if(This->port_offset > -1 && 04510 This->display_modifiers & URI_DISPLAY_NO_DEFAULT_PORT_AUTH && 04511 is_default_port(This->scheme_type, This->port)) 04512 /* Only count up until the port in the authority. */ 04513 *pcchProperty = This->port_offset; 04514 else 04515 *pcchProperty = This->authority_len; 04516 hres = (This->authority_start > -1) ? S_OK : S_FALSE; 04517 break; 04518 case Uri_PROPERTY_DISPLAY_URI: 04519 if(This->scheme_type != URL_SCHEME_UNKNOWN && This->userinfo_start > -1) 04520 *pcchProperty = This->canon_len-This->userinfo_len-1; 04521 else 04522 *pcchProperty = This->canon_len; 04523 04524 hres = S_OK; 04525 break; 04526 case Uri_PROPERTY_DOMAIN: 04527 if(This->domain_offset > -1) 04528 *pcchProperty = This->host_len - This->domain_offset; 04529 else 04530 *pcchProperty = 0; 04531 04532 hres = (This->domain_offset > -1) ? S_OK : S_FALSE; 04533 break; 04534 case Uri_PROPERTY_EXTENSION: 04535 if(This->extension_offset > -1) { 04536 *pcchProperty = This->path_len - This->extension_offset; 04537 hres = S_OK; 04538 } else { 04539 *pcchProperty = 0; 04540 hres = S_FALSE; 04541 } 04542 04543 break; 04544 case Uri_PROPERTY_FRAGMENT: 04545 *pcchProperty = This->fragment_len; 04546 hres = (This->fragment_start > -1) ? S_OK : S_FALSE; 04547 break; 04548 case Uri_PROPERTY_HOST: 04549 *pcchProperty = This->host_len; 04550 04551 /* '[' and ']' aren't included in the length. */ 04552 if(This->host_type == Uri_HOST_IPV6) 04553 *pcchProperty -= 2; 04554 04555 hres = (This->host_start > -1) ? S_OK : S_FALSE; 04556 break; 04557 case Uri_PROPERTY_PASSWORD: 04558 *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_len-This->userinfo_split-1 : 0; 04559 hres = (This->userinfo_split > -1) ? S_OK : S_FALSE; 04560 break; 04561 case Uri_PROPERTY_PATH: 04562 *pcchProperty = This->path_len; 04563 hres = (This->path_start > -1) ? S_OK : S_FALSE; 04564 break; 04565 case Uri_PROPERTY_PATH_AND_QUERY: 04566 *pcchProperty = This->path_len+This->query_len; 04567 hres = (This->path_start > -1 || This->query_start > -1) ? S_OK : S_FALSE; 04568 break; 04569 case Uri_PROPERTY_QUERY: 04570 *pcchProperty = This->query_len; 04571 hres = (This->query_start > -1) ? S_OK : S_FALSE; 04572 break; 04573 case Uri_PROPERTY_RAW_URI: 04574 *pcchProperty = SysStringLen(This->raw_uri); 04575 hres = S_OK; 04576 break; 04577 case Uri_PROPERTY_SCHEME_NAME: 04578 *pcchProperty = This->scheme_len; 04579 hres = (This->scheme_start > -1) ? S_OK : S_FALSE; 04580 break; 04581 case Uri_PROPERTY_USER_INFO: 04582 *pcchProperty = This->userinfo_len; 04583 hres = (This->userinfo_start > -1) ? S_OK : S_FALSE; 04584 break; 04585 case Uri_PROPERTY_USER_NAME: 04586 *pcchProperty = (This->userinfo_split > -1) ? This->userinfo_split : This->userinfo_len; 04587 if(This->userinfo_split == 0) 04588 hres = S_FALSE; 04589 else 04590 hres = (This->userinfo_start > -1) ? S_OK : S_FALSE; 04591 break; 04592 default: 04593 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags); 04594 hres = E_NOTIMPL; 04595 } 04596 04597 return hres; 04598 } 04599 04600 static HRESULT WINAPI Uri_GetPropertyDWORD(IUri *iface, Uri_PROPERTY uriProp, DWORD *pcchProperty, DWORD dwFlags) 04601 { 04602 Uri *This = impl_from_IUri(iface); 04603 HRESULT hres; 04604 04605 TRACE("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags); 04606 04607 if(!pcchProperty) 04608 return E_INVALIDARG; 04609 04610 /* Microsoft's implementation for the ZONE property of a URI seems to be lacking... 04611 * From what I can tell, instead of checking which URLZONE the URI belongs to it 04612 * simply assigns URLZONE_INVALID and returns E_NOTIMPL. This also applies to the GetZone 04613 * function. 04614 */ 04615 if(uriProp == Uri_PROPERTY_ZONE) { 04616 *pcchProperty = URLZONE_INVALID; 04617 return E_NOTIMPL; 04618 } 04619 04620 if(uriProp < Uri_PROPERTY_DWORD_START) { 04621 *pcchProperty = 0; 04622 return E_INVALIDARG; 04623 } 04624 04625 switch(uriProp) { 04626 case Uri_PROPERTY_HOST_TYPE: 04627 *pcchProperty = This->host_type; 04628 hres = S_OK; 04629 break; 04630 case Uri_PROPERTY_PORT: 04631 if(!This->has_port) { 04632 *pcchProperty = 0; 04633 hres = S_FALSE; 04634 } else { 04635 *pcchProperty = This->port; 04636 hres = S_OK; 04637 } 04638 04639 break; 04640 case Uri_PROPERTY_SCHEME: 04641 *pcchProperty = This->scheme_type; 04642 hres = S_OK; 04643 break; 04644 default: 04645 FIXME("(%p)->(%d %p %x)\n", This, uriProp, pcchProperty, dwFlags); 04646 hres = E_NOTIMPL; 04647 } 04648 04649 return hres; 04650 } 04651 04652 static HRESULT WINAPI Uri_HasProperty(IUri *iface, Uri_PROPERTY uriProp, BOOL *pfHasProperty) 04653 { 04654 Uri *This = impl_from_IUri(iface); 04655 TRACE("(%p)->(%d %p)\n", This, uriProp, pfHasProperty); 04656 04657 if(!pfHasProperty) 04658 return E_INVALIDARG; 04659 04660 switch(uriProp) { 04661 case Uri_PROPERTY_ABSOLUTE_URI: 04662 *pfHasProperty = !(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI); 04663 break; 04664 case Uri_PROPERTY_AUTHORITY: 04665 *pfHasProperty = This->authority_start > -1; 04666 break; 04667 case Uri_PROPERTY_DISPLAY_URI: 04668 *pfHasProperty = TRUE; 04669 break; 04670 case Uri_PROPERTY_DOMAIN: 04671 *pfHasProperty = This->domain_offset > -1; 04672 break; 04673 case Uri_PROPERTY_EXTENSION: 04674 *pfHasProperty = This->extension_offset > -1; 04675 break; 04676 case Uri_PROPERTY_FRAGMENT: 04677 *pfHasProperty = This->fragment_start > -1; 04678 break; 04679 case Uri_PROPERTY_HOST: 04680 *pfHasProperty = This->host_start > -1; 04681 break; 04682 case Uri_PROPERTY_PASSWORD: 04683 *pfHasProperty = This->userinfo_split > -1; 04684 break; 04685 case Uri_PROPERTY_PATH: 04686 *pfHasProperty = This->path_start > -1; 04687 break; 04688 case Uri_PROPERTY_PATH_AND_QUERY: 04689 *pfHasProperty = (This->path_start > -1 || This->query_start > -1); 04690 break; 04691 case Uri_PROPERTY_QUERY: 04692 *pfHasProperty = This->query_start > -1; 04693 break; 04694 case Uri_PROPERTY_RAW_URI: 04695 *pfHasProperty = TRUE; 04696 break; 04697 case Uri_PROPERTY_SCHEME_NAME: 04698 *pfHasProperty = This->scheme_start > -1; 04699 break; 04700 case Uri_PROPERTY_USER_INFO: 04701 *pfHasProperty = This->userinfo_start > -1; 04702 break; 04703 case Uri_PROPERTY_USER_NAME: 04704 if(This->userinfo_split == 0) 04705 *pfHasProperty = FALSE; 04706 else 04707 *pfHasProperty = This->userinfo_start > -1; 04708 break; 04709 case Uri_PROPERTY_HOST_TYPE: 04710 *pfHasProperty = TRUE; 04711 break; 04712 case Uri_PROPERTY_PORT: 04713 *pfHasProperty = This->has_port; 04714 break; 04715 case Uri_PROPERTY_SCHEME: 04716 *pfHasProperty = TRUE; 04717 break; 04718 case Uri_PROPERTY_ZONE: 04719 *pfHasProperty = FALSE; 04720 break; 04721 default: 04722 FIXME("(%p)->(%d %p): Unsupported property type.\n", This, uriProp, pfHasProperty); 04723 return E_NOTIMPL; 04724 } 04725 04726 return S_OK; 04727 } 04728 04729 static HRESULT WINAPI Uri_GetAbsoluteUri(IUri *iface, BSTR *pstrAbsoluteUri) 04730 { 04731 TRACE("(%p)->(%p)\n", iface, pstrAbsoluteUri); 04732 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_ABSOLUTE_URI, pstrAbsoluteUri, 0); 04733 } 04734 04735 static HRESULT WINAPI Uri_GetAuthority(IUri *iface, BSTR *pstrAuthority) 04736 { 04737 TRACE("(%p)->(%p)\n", iface, pstrAuthority); 04738 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_AUTHORITY, pstrAuthority, 0); 04739 } 04740 04741 static HRESULT WINAPI Uri_GetDisplayUri(IUri *iface, BSTR *pstrDisplayUri) 04742 { 04743 TRACE("(%p)->(%p)\n", iface, pstrDisplayUri); 04744 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_DISPLAY_URI, pstrDisplayUri, 0); 04745 } 04746 04747 static HRESULT WINAPI Uri_GetDomain(IUri *iface, BSTR *pstrDomain) 04748 { 04749 TRACE("(%p)->(%p)\n", iface, pstrDomain); 04750 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_DOMAIN, pstrDomain, 0); 04751 } 04752 04753 static HRESULT WINAPI Uri_GetExtension(IUri *iface, BSTR *pstrExtension) 04754 { 04755 TRACE("(%p)->(%p)\n", iface, pstrExtension); 04756 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_EXTENSION, pstrExtension, 0); 04757 } 04758 04759 static HRESULT WINAPI Uri_GetFragment(IUri *iface, BSTR *pstrFragment) 04760 { 04761 TRACE("(%p)->(%p)\n", iface, pstrFragment); 04762 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_FRAGMENT, pstrFragment, 0); 04763 } 04764 04765 static HRESULT WINAPI Uri_GetHost(IUri *iface, BSTR *pstrHost) 04766 { 04767 TRACE("(%p)->(%p)\n", iface, pstrHost); 04768 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_HOST, pstrHost, 0); 04769 } 04770 04771 static HRESULT WINAPI Uri_GetPassword(IUri *iface, BSTR *pstrPassword) 04772 { 04773 TRACE("(%p)->(%p)\n", iface, pstrPassword); 04774 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_PASSWORD, pstrPassword, 0); 04775 } 04776 04777 static HRESULT WINAPI Uri_GetPath(IUri *iface, BSTR *pstrPath) 04778 { 04779 TRACE("(%p)->(%p)\n", iface, pstrPath); 04780 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH, pstrPath, 0); 04781 } 04782 04783 static HRESULT WINAPI Uri_GetPathAndQuery(IUri *iface, BSTR *pstrPathAndQuery) 04784 { 04785 TRACE("(%p)->(%p)\n", iface, pstrPathAndQuery); 04786 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_PATH_AND_QUERY, pstrPathAndQuery, 0); 04787 } 04788 04789 static HRESULT WINAPI Uri_GetQuery(IUri *iface, BSTR *pstrQuery) 04790 { 04791 TRACE("(%p)->(%p)\n", iface, pstrQuery); 04792 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_QUERY, pstrQuery, 0); 04793 } 04794 04795 static HRESULT WINAPI Uri_GetRawUri(IUri *iface, BSTR *pstrRawUri) 04796 { 04797 TRACE("(%p)->(%p)\n", iface, pstrRawUri); 04798 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_RAW_URI, pstrRawUri, 0); 04799 } 04800 04801 static HRESULT WINAPI Uri_GetSchemeName(IUri *iface, BSTR *pstrSchemeName) 04802 { 04803 TRACE("(%p)->(%p)\n", iface, pstrSchemeName); 04804 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_SCHEME_NAME, pstrSchemeName, 0); 04805 } 04806 04807 static HRESULT WINAPI Uri_GetUserInfo(IUri *iface, BSTR *pstrUserInfo) 04808 { 04809 TRACE("(%p)->(%p)\n", iface, pstrUserInfo); 04810 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_INFO, pstrUserInfo, 0); 04811 } 04812 04813 static HRESULT WINAPI Uri_GetUserName(IUri *iface, BSTR *pstrUserName) 04814 { 04815 TRACE("(%p)->(%p)\n", iface, pstrUserName); 04816 return IUri_GetPropertyBSTR(iface, Uri_PROPERTY_USER_NAME, pstrUserName, 0); 04817 } 04818 04819 static HRESULT WINAPI Uri_GetHostType(IUri *iface, DWORD *pdwHostType) 04820 { 04821 TRACE("(%p)->(%p)\n", iface, pdwHostType); 04822 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_HOST_TYPE, pdwHostType, 0); 04823 } 04824 04825 static HRESULT WINAPI Uri_GetPort(IUri *iface, DWORD *pdwPort) 04826 { 04827 TRACE("(%p)->(%p)\n", iface, pdwPort); 04828 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_PORT, pdwPort, 0); 04829 } 04830 04831 static HRESULT WINAPI Uri_GetScheme(IUri *iface, DWORD *pdwScheme) 04832 { 04833 TRACE("(%p)->(%p)\n", iface, pdwScheme); 04834 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_SCHEME, pdwScheme, 0); 04835 } 04836 04837 static HRESULT WINAPI Uri_GetZone(IUri *iface, DWORD *pdwZone) 04838 { 04839 TRACE("(%p)->(%p)\n", iface, pdwZone); 04840 return IUri_GetPropertyDWORD(iface, Uri_PROPERTY_ZONE,pdwZone, 0); 04841 } 04842 04843 static HRESULT WINAPI Uri_GetProperties(IUri *iface, DWORD *pdwProperties) 04844 { 04845 Uri *This = impl_from_IUri(iface); 04846 TRACE("(%p)->(%p)\n", This, pdwProperties); 04847 04848 if(!pdwProperties) 04849 return E_INVALIDARG; 04850 04851 /* All URIs have these. */ 04852 *pdwProperties = Uri_HAS_DISPLAY_URI|Uri_HAS_RAW_URI|Uri_HAS_SCHEME|Uri_HAS_HOST_TYPE; 04853 04854 if(!(This->display_modifiers & URI_DISPLAY_NO_ABSOLUTE_URI)) 04855 *pdwProperties |= Uri_HAS_ABSOLUTE_URI; 04856 04857 if(This->scheme_start > -1) 04858 *pdwProperties |= Uri_HAS_SCHEME_NAME; 04859 04860 if(This->authority_start > -1) { 04861 *pdwProperties |= Uri_HAS_AUTHORITY; 04862 if(This->userinfo_start > -1) { 04863 *pdwProperties |= Uri_HAS_USER_INFO; 04864 if(This->userinfo_split != 0) 04865 *pdwProperties |= Uri_HAS_USER_NAME; 04866 } 04867 if(This->userinfo_split > -1) 04868 *pdwProperties |= Uri_HAS_PASSWORD; 04869 if(This->host_start > -1) 04870 *pdwProperties |= Uri_HAS_HOST; 04871 if(This->domain_offset > -1) 04872 *pdwProperties |= Uri_HAS_DOMAIN; 04873 } 04874 04875 if(This->has_port) 04876 *pdwProperties |= Uri_HAS_PORT; 04877 if(This->path_start > -1) 04878 *pdwProperties |= Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY; 04879 if(This->query_start > -1) 04880 *pdwProperties |= Uri_HAS_QUERY|Uri_HAS_PATH_AND_QUERY; 04881 04882 if(This->extension_offset > -1) 04883 *pdwProperties |= Uri_HAS_EXTENSION; 04884 04885 if(This->fragment_start > -1) 04886 *pdwProperties |= Uri_HAS_FRAGMENT; 04887 04888 return S_OK; 04889 } 04890 04891 static HRESULT WINAPI Uri_IsEqual(IUri *iface, IUri *pUri, BOOL *pfEqual) 04892 { 04893 Uri *This = impl_from_IUri(iface); 04894 Uri *other; 04895 04896 TRACE("(%p)->(%p %p)\n", This, pUri, pfEqual); 04897 04898 if(!pfEqual) 04899 return E_POINTER; 04900 04901 if(!pUri) { 04902 *pfEqual = FALSE; 04903 04904 /* For some reason Windows returns S_OK here... */ 04905 return S_OK; 04906 } 04907 04908 /* Try to convert it to a Uri (allows for a more simple comparison). */ 04909 if((other = get_uri_obj(pUri))) 04910 *pfEqual = are_equal_simple(This, other); 04911 else { 04912 /* Do it the hard way. */ 04913 FIXME("(%p)->(%p %p) No support for unknown IUri's yet.\n", iface, pUri, pfEqual); 04914 return E_NOTIMPL; 04915 } 04916 04917 return S_OK; 04918 } 04919 04920 static const IUriVtbl UriVtbl = { 04921 Uri_QueryInterface, 04922 Uri_AddRef, 04923 Uri_Release, 04924 Uri_GetPropertyBSTR, 04925 Uri_GetPropertyLength, 04926 Uri_GetPropertyDWORD, 04927 Uri_HasProperty, 04928 Uri_GetAbsoluteUri, 04929 Uri_GetAuthority, 04930 Uri_GetDisplayUri, 04931 Uri_GetDomain, 04932 Uri_GetExtension, 04933 Uri_GetFragment, 04934 Uri_GetHost, 04935 Uri_GetPassword, 04936 Uri_GetPath, 04937 Uri_GetPathAndQuery, 04938 Uri_GetQuery, 04939 Uri_GetRawUri, 04940 Uri_GetSchemeName, 04941 Uri_GetUserInfo, 04942 Uri_GetUserName, 04943 Uri_GetHostType, 04944 Uri_GetPort, 04945 Uri_GetScheme, 04946 Uri_GetZone, 04947 Uri_GetProperties, 04948 Uri_IsEqual 04949 }; 04950 04951 static inline Uri* impl_from_IUriBuilderFactory(IUriBuilderFactory *iface) 04952 { 04953 return CONTAINING_RECORD(iface, Uri, IUriBuilderFactory_iface); 04954 } 04955 04956 static HRESULT WINAPI UriBuilderFactory_QueryInterface(IUriBuilderFactory *iface, REFIID riid, void **ppv) 04957 { 04958 Uri *This = impl_from_IUriBuilderFactory(iface); 04959 04960 if(IsEqualGUID(&IID_IUnknown, riid)) { 04961 TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv); 04962 *ppv = &This->IUriBuilderFactory_iface; 04963 }else if(IsEqualGUID(&IID_IUriBuilderFactory, riid)) { 04964 TRACE("(%p)->(IID_IUriBuilderFactory %p)\n", This, ppv); 04965 *ppv = &This->IUriBuilderFactory_iface; 04966 }else if(IsEqualGUID(&IID_IUri, riid)) { 04967 TRACE("(%p)->(IID_IUri %p)\n", This, ppv); 04968 *ppv = &This->IUri_iface; 04969 }else { 04970 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv); 04971 *ppv = NULL; 04972 return E_NOINTERFACE; 04973 } 04974 04975 IUnknown_AddRef((IUnknown*)*ppv); 04976 return S_OK; 04977 } 04978 04979 static ULONG WINAPI UriBuilderFactory_AddRef(IUriBuilderFactory *iface) 04980 { 04981 Uri *This = impl_from_IUriBuilderFactory(iface); 04982 LONG ref = InterlockedIncrement(&This->ref); 04983 04984 TRACE("(%p) ref=%d\n", This, ref); 04985 04986 return ref; 04987 } 04988 04989 static ULONG WINAPI UriBuilderFactory_Release(IUriBuilderFactory *iface) 04990 { 04991 Uri *This = impl_from_IUriBuilderFactory(iface); 04992 LONG ref = InterlockedDecrement(&This->ref); 04993 04994 TRACE("(%p) ref=%d\n", This, ref); 04995 04996 if(!ref) 04997 destory_uri_obj(This); 04998 04999 return ref; 05000 } 05001 05002 static HRESULT WINAPI UriBuilderFactory_CreateIUriBuilder(IUriBuilderFactory *iface, 05003 DWORD dwFlags, 05004 DWORD_PTR dwReserved, 05005 IUriBuilder **ppIUriBuilder) 05006 { 05007 Uri *This = impl_from_IUriBuilderFactory(iface); 05008 TRACE("(%p)->(%08x %08x %p)\n", This, dwFlags, (DWORD)dwReserved, ppIUriBuilder); 05009 05010 if(!ppIUriBuilder) 05011 return E_POINTER; 05012 05013 if(dwFlags || dwReserved) { 05014 *ppIUriBuilder = NULL; 05015 return E_INVALIDARG; 05016 } 05017 05018 return CreateIUriBuilder(NULL, 0, 0, ppIUriBuilder); 05019 } 05020 05021 static HRESULT WINAPI UriBuilderFactory_CreateInitializedIUriBuilder(IUriBuilderFactory *iface, 05022 DWORD dwFlags, 05023 DWORD_PTR dwReserved, 05024 IUriBuilder **ppIUriBuilder) 05025 { 05026 Uri *This = impl_from_IUriBuilderFactory(iface); 05027 TRACE("(%p)->(%08x %08x %p)\n", This, dwFlags, (DWORD)dwReserved, ppIUriBuilder); 05028 05029 if(!ppIUriBuilder) 05030 return E_POINTER; 05031 05032 if(dwFlags || dwReserved) { 05033 *ppIUriBuilder = NULL; 05034 return E_INVALIDARG; 05035 } 05036 05037 return CreateIUriBuilder(&This->IUri_iface, 0, 0, ppIUriBuilder); 05038 } 05039 05040 static const IUriBuilderFactoryVtbl UriBuilderFactoryVtbl = { 05041 UriBuilderFactory_QueryInterface, 05042 UriBuilderFactory_AddRef, 05043 UriBuilderFactory_Release, 05044 UriBuilderFactory_CreateIUriBuilder, 05045 UriBuilderFactory_CreateInitializedIUriBuilder 05046 }; 05047 05048 static Uri* create_uri_obj(void) { 05049 Uri *ret = heap_alloc_zero(sizeof(Uri)); 05050 if(ret) { 05051 ret->IUri_iface.lpVtbl = &UriVtbl; 05052 ret->IUriBuilderFactory_iface.lpVtbl = &UriBuilderFactoryVtbl; 05053 ret->ref = 1; 05054 } 05055 05056 return ret; 05057 } 05058 05059 /*********************************************************************** 05060 * CreateUri (urlmon.@) 05061 * 05062 * Creates a new IUri object using the URI represented by pwzURI. This function 05063 * parses and validates the components of pwzURI and then canonicalizes the 05064 * parsed components. 05065 * 05066 * PARAMS 05067 * pwzURI [I] The URI to parse, validate, and canonicalize. 05068 * dwFlags [I] Flags which can affect how the parsing/canonicalization is performed. 05069 * dwReserved [I] Reserved (not used). 05070 * ppURI [O] The resulting IUri after parsing/canonicalization occurs. 05071 * 05072 * RETURNS 05073 * Success: Returns S_OK. ppURI contains the pointer to the newly allocated IUri. 05074 * Failure: E_INVALIDARG if there's invalid flag combinations in dwFlags, or an 05075 * invalid parameters, or pwzURI doesn't represnt a valid URI. 05076 * E_OUTOFMEMORY if any memory allocation fails. 05077 * 05078 * NOTES 05079 * Default flags: 05080 * Uri_CREATE_CANONICALIZE, Uri_CREATE_DECODE_EXTRA_INFO, Uri_CREATE_CRACK_UNKNOWN_SCHEMES, 05081 * Uri_CREATE_PRE_PROCESS_HTML_URI, Uri_CREATE_NO_IE_SETTINGS. 05082 */ 05083 HRESULT WINAPI CreateUri(LPCWSTR pwzURI, DWORD dwFlags, DWORD_PTR dwReserved, IUri **ppURI) 05084 { 05085 const DWORD supported_flags = Uri_CREATE_ALLOW_RELATIVE|Uri_CREATE_ALLOW_IMPLICIT_WILDCARD_SCHEME| 05086 Uri_CREATE_ALLOW_IMPLICIT_FILE_SCHEME|Uri_CREATE_NO_CANONICALIZE|Uri_CREATE_CANONICALIZE| 05087 Uri_CREATE_DECODE_EXTRA_INFO|Uri_CREATE_NO_DECODE_EXTRA_INFO|Uri_CREATE_CRACK_UNKNOWN_SCHEMES| 05088 Uri_CREATE_NO_CRACK_UNKNOWN_SCHEMES|Uri_CREATE_PRE_PROCESS_HTML_URI|Uri_CREATE_NO_PRE_PROCESS_HTML_URI| 05089 Uri_CREATE_NO_IE_SETTINGS|Uri_CREATE_NO_ENCODE_FORBIDDEN_CHARACTERS|Uri_CREATE_FILE_USE_DOS_PATH; 05090 Uri *ret; 05091 HRESULT hr; 05092 parse_data data; 05093 05094 TRACE("(%s %x %x %p)\n", debugstr_w(pwzURI), dwFlags, (DWORD)dwReserved, ppURI); 05095 05096 if(!ppURI) 05097 return E_INVALIDARG; 05098 05099 if(!pwzURI || !*pwzURI) { 05100 *ppURI = NULL; 05101 return E_INVALIDARG; 05102 } 05103 05104 /* Check for invalid flags. */ 05105 if(has_invalid_flag_combination(dwFlags)) { 05106 *ppURI = NULL; 05107 return E_INVALIDARG; 05108 } 05109 05110 /* Currently unsupported. */ 05111 if(dwFlags & ~supported_flags) 05112 FIXME("Ignoring unsupported flag(s) %x\n", dwFlags & ~supported_flags); 05113 05114 ret = create_uri_obj(); 05115 if(!ret) { 05116 *ppURI = NULL; 05117 return E_OUTOFMEMORY; 05118 } 05119 05120 /* Explicitly set the default flags if it doesn't cause a flag conflict. */ 05121 apply_default_flags(&dwFlags); 05122 05123 /* Pre process the URI, unless told otherwise. */ 05124 if(!(dwFlags & Uri_CREATE_NO_PRE_PROCESS_HTML_URI)) 05125 ret->raw_uri = pre_process_uri(pwzURI); 05126 else 05127 ret->raw_uri = SysAllocString(pwzURI); 05128 05129 if(!ret->raw_uri) { 05130 heap_free(ret); 05131 return E_OUTOFMEMORY; 05132 } 05133 05134 memset(&data, 0, sizeof(parse_data)); 05135 data.uri = ret->raw_uri; 05136 05137 /* Validate and parse the URI into it's components. */ 05138 if(!parse_uri(&data, dwFlags)) { 05139 /* Encountered an unsupported or invalid URI */ 05140 IUri_Release(&ret->IUri_iface); 05141 *ppURI = NULL; 05142 return E_INVALIDARG; 05143 } 05144 05145 /* Canonicalize the URI. */ 05146 hr = canonicalize_uri(&data, ret, dwFlags); 05147 if(FAILED(hr)) { 05148 IUri_Release(&ret->IUri_iface); 05149 *ppURI = NULL; 05150 return hr; 05151 } 05152 05153 ret->create_flags = dwFlags; 05154 05155 *ppURI = &ret->IUri_iface; 05156 return S_OK; 05157 } 05158 05159 /*********************************************************************** 05160 * CreateUriWithFragment (urlmon.@) 05161 * 05162 * Creates a new IUri object. This is almost the same as CreateUri, expect that 05163 * it allows you to explicitly specify a fragment (pwzFragment) for pwzURI. 05164 * 05165 * PARAMS 05166 * pwzURI [I] The URI to parse and perform canonicalization on. 05167 * pwzFragment [I] The explict fragment string which should be added to pwzURI. 05168 * dwFlags [I] The flags which will be passed to CreateUri. 05169 * dwReserved [I] Reserved (not used). 05170 * ppURI [O] The resulting IUri after parsing/canonicalization. 05171 * 05172 * RETURNS 05173 * Success: S_OK. ppURI contains the pointer to the newly allocated IUri. 05174 * Failure: E_INVALIDARG if pwzURI already contains a fragment and pwzFragment 05175 * isn't NULL. Will also return E_INVALIDARG for the same reasons as 05176 * CreateUri will. E_OUTOFMEMORY if any allocations fail. 05177 */ 05178 HRESULT WINAPI CreateUriWithFragment(LPCWSTR pwzURI, LPCWSTR pwzFragment, DWORD dwFlags, 05179 DWORD_PTR dwReserved, IUri **ppURI) 05180 { 05181 HRESULT hres; 05182 TRACE("(%s %s %x %x %p)\n", debugstr_w(pwzURI), debugstr_w(pwzFragment), dwFlags, (DWORD)dwReserved, ppURI); 05183 05184 if(!ppURI) 05185 return E_INVALIDARG; 05186 05187 if(!pwzURI) { 05188 *ppURI = NULL; 05189 return E_INVALIDARG; 05190 } 05191 05192 /* Check if a fragment should be appended to the URI string. */ 05193 if(pwzFragment) { 05194 WCHAR *uriW; 05195 DWORD uri_len, frag_len; 05196 BOOL add_pound; 05197 05198 /* Check if the original URI already has a fragment component. */ 05199 if(StrChrW(pwzURI, '#')) { 05200 *ppURI = NULL; 05201 return E_INVALIDARG; 05202 } 05203 05204 uri_len = lstrlenW(pwzURI); 05205 frag_len = lstrlenW(pwzFragment); 05206 05207 /* If the fragment doesn't start with a '#', one will be added. */ 05208 add_pound = *pwzFragment != '#'; 05209 05210 if(add_pound) 05211 uriW = heap_alloc((uri_len+frag_len+2)*sizeof(WCHAR)); 05212 else 05213 uriW = heap_alloc((uri_len+frag_len+1)*sizeof(WCHAR)); 05214 05215 if(!uriW) 05216 return E_OUTOFMEMORY; 05217 05218 memcpy(uriW, pwzURI, uri_len*sizeof(WCHAR)); 05219 if(add_pound) 05220 uriW[uri_len++] = '#'; 05221 memcpy(uriW+uri_len, pwzFragment, (frag_len+1)*sizeof(WCHAR)); 05222 05223 hres = CreateUri(uriW, dwFlags, 0, ppURI); 05224 05225 heap_free(uriW); 05226 } else 05227 /* A fragment string wasn't specified, so just forward the call. */ 05228 hres = CreateUri(pwzURI, dwFlags, 0, ppURI); 05229 05230 return hres; 05231 } 05232 05233 static HRESULT build_uri(const UriBuilder *builder, IUri **uri, DWORD create_flags, 05234 DWORD use_orig_flags, DWORD encoding_mask) 05235 { 05236 HRESULT hr; 05237 parse_data data; 05238 Uri *ret; 05239 05240 if(!uri) 05241 return E_POINTER; 05242 05243 if(encoding_mask && (!builder->uri || builder->modified_props)) { 05244 *uri = NULL; 05245 return E_NOTIMPL; 05246 } 05247 05248 /* Decide what flags should be used when creating the Uri. */ 05249 if((use_orig_flags & UriBuilder_USE_ORIGINAL_FLAGS) && builder->uri) 05250 create_flags = builder->uri->create_flags; 05251 else { 05252 if(has_invalid_flag_combination(create_flags)) { 05253 *uri = NULL; 05254 return E_INVALIDARG; 05255 } 05256 05257 /* Set the default flags if they don't cause a conflict. */ 05258 apply_default_flags(&create_flags); 05259 } 05260 05261 /* Return the base IUri if no changes have been made and the create_flags match. */ 05262 if(builder->uri && !builder->modified_props && builder->uri->create_flags == create_flags) { 05263 *uri = &builder->uri->IUri_iface; 05264 IUri_AddRef(*uri); 05265 return S_OK; 05266 } 05267 05268 hr = validate_components(builder, &data, create_flags); 05269 if(FAILED(hr)) { 05270 *uri = NULL; 05271 return hr; 05272 } 05273 05274 ret = create_uri_obj(); 05275 if(!ret) { 05276 *uri = NULL; 05277 return E_OUTOFMEMORY; 05278 } 05279 05280 hr = generate_uri(builder, &data, ret, create_flags); 05281 if(FAILED(hr)) { 05282 IUri_Release(&ret->IUri_iface); 05283 *uri = NULL; 05284 return hr; 05285 } 05286 05287 *uri = &ret->IUri_iface; 05288 return S_OK; 05289 } 05290 05291 static inline UriBuilder* impl_from_IUriBuilder(IUriBuilder *iface) 05292 { 05293 return CONTAINING_RECORD(iface, UriBuilder, IUriBuilder_iface); 05294 } 05295 05296 static HRESULT WINAPI UriBuilder_QueryInterface(IUriBuilder *iface, REFIID riid, void **ppv) 05297 { 05298 UriBuilder *This = impl_from_IUriBuilder(iface); 05299 05300 if(IsEqualGUID(&IID_IUnknown, riid)) { 05301 TRACE("(%p)->(IID_IUnknown %p)\n", This, ppv); 05302 *ppv = &This->IUriBuilder_iface; 05303 }else if(IsEqualGUID(&IID_IUriBuilder, riid)) { 05304 TRACE("(%p)->(IID_IUriBuilder %p)\n", This, ppv); 05305 *ppv = &This->IUriBuilder_iface; 05306 }else { 05307 TRACE("(%p)->(%s %p)\n", This, debugstr_guid(riid), ppv); 05308 *ppv = NULL; 05309 return E_NOINTERFACE; 05310 } 05311 05312 IUnknown_AddRef((IUnknown*)*ppv); 05313 return S_OK; 05314 } 05315 05316 static ULONG WINAPI UriBuilder_AddRef(IUriBuilder *iface) 05317 { 05318 UriBuilder *This = impl_from_IUriBuilder(iface); 05319 LONG ref = InterlockedIncrement(&This->ref); 05320 05321 TRACE("(%p) ref=%d\n", This, ref); 05322 05323 return ref; 05324 } 05325 05326 static ULONG WINAPI UriBuilder_Release(IUriBuilder *iface) 05327 { 05328 UriBuilder *This = impl_from_IUriBuilder(iface); 05329 LONG ref = InterlockedDecrement(&This->ref); 05330 05331 TRACE("(%p) ref=%d\n", This, ref); 05332 05333 if(!ref) { 05334 if(This->uri) IUri_Release(&This->uri->IUri_iface); 05335 heap_free(This->fragment); 05336 heap_free(This->host); 05337 heap_free(This->password); 05338 heap_free(This->path); 05339 heap_free(This->query); 05340 heap_free(This->scheme); 05341 heap_free(This->username); 05342 heap_free(This); 05343 } 05344 05345 return ref; 05346 } 05347 05348 static HRESULT WINAPI UriBuilder_CreateUriSimple(IUriBuilder *iface, 05349 DWORD dwAllowEncodingPropertyMask, 05350 DWORD_PTR dwReserved, 05351 IUri **ppIUri) 05352 { 05353 UriBuilder *This = impl_from_IUriBuilder(iface); 05354 HRESULT hr; 05355 TRACE("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 05356 05357 hr = build_uri(This, ppIUri, 0, UriBuilder_USE_ORIGINAL_FLAGS, dwAllowEncodingPropertyMask); 05358 if(hr == E_NOTIMPL) 05359 FIXME("(%p)->(%d %d %p)\n", This, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 05360 return hr; 05361 } 05362 05363 static HRESULT WINAPI UriBuilder_CreateUri(IUriBuilder *iface, 05364 DWORD dwCreateFlags, 05365 DWORD dwAllowEncodingPropertyMask, 05366 DWORD_PTR dwReserved, 05367 IUri **ppIUri) 05368 { 05369 UriBuilder *This = impl_from_IUriBuilder(iface); 05370 HRESULT hr; 05371 TRACE("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 05372 05373 if(dwCreateFlags == -1) 05374 hr = build_uri(This, ppIUri, 0, UriBuilder_USE_ORIGINAL_FLAGS, dwAllowEncodingPropertyMask); 05375 else 05376 hr = build_uri(This, ppIUri, dwCreateFlags, 0, dwAllowEncodingPropertyMask); 05377 05378 if(hr == E_NOTIMPL) 05379 FIXME("(%p)->(0x%08x %d %d %p)\n", This, dwCreateFlags, dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 05380 return hr; 05381 } 05382 05383 static HRESULT WINAPI UriBuilder_CreateUriWithFlags(IUriBuilder *iface, 05384 DWORD dwCreateFlags, 05385 DWORD dwUriBuilderFlags, 05386 DWORD dwAllowEncodingPropertyMask, 05387 DWORD_PTR dwReserved, 05388 IUri **ppIUri) 05389 { 05390 UriBuilder *This = impl_from_IUriBuilder(iface); 05391 HRESULT hr; 05392 TRACE("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags, 05393 dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 05394 05395 hr = build_uri(This, ppIUri, dwCreateFlags, dwUriBuilderFlags, dwAllowEncodingPropertyMask); 05396 if(hr == E_NOTIMPL) 05397 FIXME("(%p)->(0x%08x 0x%08x %d %d %p)\n", This, dwCreateFlags, dwUriBuilderFlags, 05398 dwAllowEncodingPropertyMask, (DWORD)dwReserved, ppIUri); 05399 return hr; 05400 } 05401 05402 static HRESULT WINAPI UriBuilder_GetIUri(IUriBuilder *iface, IUri **ppIUri) 05403 { 05404 UriBuilder *This = impl_from_IUriBuilder(iface); 05405 TRACE("(%p)->(%p)\n", This, ppIUri); 05406 05407 if(!ppIUri) 05408 return E_POINTER; 05409 05410 if(This->uri) { 05411 IUri *uri = &This->uri->IUri_iface; 05412 IUri_AddRef(uri); 05413 *ppIUri = uri; 05414 } else 05415 *ppIUri = NULL; 05416 05417 return S_OK; 05418 } 05419 05420 static HRESULT WINAPI UriBuilder_SetIUri(IUriBuilder *iface, IUri *pIUri) 05421 { 05422 UriBuilder *This = impl_from_IUriBuilder(iface); 05423 TRACE("(%p)->(%p)\n", This, pIUri); 05424 05425 if(pIUri) { 05426 Uri *uri; 05427 05428 if((uri = get_uri_obj(pIUri))) { 05429 /* Only reset the builder if it's Uri isn't the same as 05430 * the Uri passed to the function. 05431 */ 05432 if(This->uri != uri) { 05433 reset_builder(This); 05434 05435 This->uri = uri; 05436 if(uri->has_port) 05437 This->port = uri->port; 05438 05439 IUri_AddRef(pIUri); 05440 } 05441 } else { 05442 FIXME("(%p)->(%p) Unknown IUri types not supported yet.\n", This, pIUri); 05443 return E_NOTIMPL; 05444 } 05445 } else if(This->uri) 05446 /* Only reset the builder if it's Uri isn't NULL. */ 05447 reset_builder(This); 05448 05449 return S_OK; 05450 } 05451 05452 static HRESULT WINAPI UriBuilder_GetFragment(IUriBuilder *iface, DWORD *pcchFragment, LPCWSTR *ppwzFragment) 05453 { 05454 UriBuilder *This = impl_from_IUriBuilder(iface); 05455 TRACE("(%p)->(%p %p)\n", This, pcchFragment, ppwzFragment); 05456 05457 if(!This->uri || This->uri->fragment_start == -1 || This->modified_props & Uri_HAS_FRAGMENT) 05458 return get_builder_component(&This->fragment, &This->fragment_len, NULL, 0, ppwzFragment, pcchFragment); 05459 else 05460 return get_builder_component(&This->fragment, &This->fragment_len, This->uri->canon_uri+This->uri->fragment_start, 05461 This->uri->fragment_len, ppwzFragment, pcchFragment); 05462 } 05463 05464 static HRESULT WINAPI UriBuilder_GetHost(IUriBuilder *iface, DWORD *pcchHost, LPCWSTR *ppwzHost) 05465 { 05466 UriBuilder *This = impl_from_IUriBuilder(iface); 05467 TRACE("(%p)->(%p %p)\n", This, pcchHost, ppwzHost); 05468 05469 if(!This->uri || This->uri->host_start == -1 || This->modified_props & Uri_HAS_HOST) 05470 return get_builder_component(&This->host, &This->host_len, NULL, 0, ppwzHost, pcchHost); 05471 else { 05472 if(This->uri->host_type == Uri_HOST_IPV6) 05473 /* Don't include the '[' and ']' around the address. */ 05474 return get_builder_component(&This->host, &This->host_len, This->uri->canon_uri+This->uri->host_start+1, 05475 This->uri->host_len-2, ppwzHost, pcchHost); 05476 else 05477 return get_builder_component(&This->host, &This->host_len, This->uri->canon_uri+This->uri->host_start, 05478 This->uri->host_len, ppwzHost, pcchHost); 05479 } 05480 } 05481 05482 static HRESULT WINAPI UriBuilder_GetPassword(IUriBuilder *iface, DWORD *pcchPassword, LPCWSTR *ppwzPassword) 05483 { 05484 UriBuilder *This = impl_from_IUriBuilder(iface); 05485 TRACE("(%p)->(%p %p)\n", This, pcchPassword, ppwzPassword); 05486 05487 if(!This->uri || This->uri->userinfo_split == -1 || This->modified_props & Uri_HAS_PASSWORD) 05488 return get_builder_component(&This->password, &This->password_len, NULL, 0, ppwzPassword, pcchPassword); 05489 else { 05490 const WCHAR *start = This->uri->canon_uri+This->uri->userinfo_start+This->uri->userinfo_split+1; 05491 DWORD len = This->uri->userinfo_len-This->uri->userinfo_split-1; 05492 return get_builder_component(&This->password, &This->password_len, start, len, ppwzPassword, pcchPassword); 05493 } 05494 } 05495 05496 static HRESULT WINAPI UriBuilder_GetPath(IUriBuilder *iface, DWORD *pcchPath, LPCWSTR *ppwzPath) 05497 { 05498 UriBuilder *This = impl_from_IUriBuilder(iface); 05499 TRACE("(%p)->(%p %p)\n", This, pcchPath, ppwzPath); 05500 05501 if(!This->uri || This->uri->path_start == -1 || This->modified_props & Uri_HAS_PATH) 05502 return get_builder_component(&This->path, &This->path_len, NULL, 0, ppwzPath, pcchPath); 05503 else 05504 return get_builder_component(&This->path, &This->path_len, This->uri->canon_uri+This->uri->path_start, 05505 This->uri->path_len, ppwzPath, pcchPath); 05506 } 05507 05508 static HRESULT WINAPI UriBuilder_GetPort(IUriBuilder *iface, BOOL *pfHasPort, DWORD *pdwPort) 05509 { 05510 UriBuilder *This = impl_from_IUriBuilder(iface); 05511 TRACE("(%p)->(%p %p)\n", This, pfHasPort, pdwPort); 05512 05513 if(!pfHasPort) { 05514 if(pdwPort) 05515 *pdwPort = 0; 05516 return E_POINTER; 05517 } 05518 05519 if(!pdwPort) { 05520 *pfHasPort = FALSE; 05521 return E_POINTER; 05522 } 05523 05524 *pfHasPort = This->has_port; 05525 *pdwPort = This->port; 05526 return S_OK; 05527 } 05528 05529 static HRESULT WINAPI UriBuilder_GetQuery(IUriBuilder *iface, DWORD *pcchQuery, LPCWSTR *ppwzQuery) 05530 { 05531 UriBuilder *This = impl_from_IUriBuilder(iface); 05532 TRACE("(%p)->(%p %p)\n", This, pcchQuery, ppwzQuery); 05533 05534 if(!This->uri || This->uri->query_start == -1 || This->modified_props & Uri_HAS_QUERY) 05535 return get_builder_component(&This->query, &This->query_len, NULL, 0, ppwzQuery, pcchQuery); 05536 else 05537 return get_builder_component(&This->query, &This->query_len, This->uri->canon_uri+This->uri->query_start, 05538 This->uri->query_len, ppwzQuery, pcchQuery); 05539 } 05540 05541 static HRESULT WINAPI UriBuilder_GetSchemeName(IUriBuilder *iface, DWORD *pcchSchemeName, LPCWSTR *ppwzSchemeName) 05542 { 05543 UriBuilder *This = impl_from_IUriBuilder(iface); 05544 TRACE("(%p)->(%p %p)\n", This, pcchSchemeName, ppwzSchemeName); 05545 05546 if(!This->uri || This->uri->scheme_start == -1 || This->modified_props & Uri_HAS_SCHEME_NAME) 05547 return get_builder_component(&This->scheme, &This->scheme_len, NULL, 0, ppwzSchemeName, pcchSchemeName); 05548 else 05549 return get_builder_component(&This->scheme, &This->scheme_len, This->uri->canon_uri+This->uri->scheme_start, 05550 This->uri->scheme_len, ppwzSchemeName, pcchSchemeName); 05551 } 05552 05553 static HRESULT WINAPI UriBuilder_GetUserName(IUriBuilder *iface, DWORD *pcchUserName, LPCWSTR *ppwzUserName) 05554 { 05555 UriBuilder *This = impl_from_IUriBuilder(iface); 05556 TRACE("(%p)->(%p %p)\n", This, pcchUserName, ppwzUserName); 05557 05558 if(!This->uri || This->uri->userinfo_start == -1 || This->uri->userinfo_split == 0 || 05559 This->modified_props & Uri_HAS_USER_NAME) 05560 return get_builder_component(&This->username, &This->username_len, NULL, 0, ppwzUserName, pcchUserName); 05561 else { 05562 const WCHAR *start = This->uri->canon_uri+This->uri->userinfo_start; 05563 05564 /* Check if there's a password in the userinfo section. */ 05565 if(This->uri->userinfo_split > -1) 05566 /* Don't include the password. */ 05567 return get_builder_component(&This->username, &This->username_len, start, 05568 This->uri->userinfo_split, ppwzUserName, pcchUserName); 05569 else 05570 return get_builder_component(&This->username, &This->username_len, start, 05571 This->uri->userinfo_len, ppwzUserName, pcchUserName); 05572 } 05573 } 05574 05575 static HRESULT WINAPI UriBuilder_SetFragment(IUriBuilder *iface, LPCWSTR pwzNewValue) 05576 { 05577 UriBuilder *This = impl_from_IUriBuilder(iface); 05578 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 05579 return set_builder_component(&This->fragment, &This->fragment_len, pwzNewValue, '#', 05580 &This->modified_props, Uri_HAS_FRAGMENT); 05581 } 05582 05583 static HRESULT WINAPI UriBuilder_SetHost(IUriBuilder *iface, LPCWSTR pwzNewValue) 05584 { 05585 UriBuilder *This = impl_from_IUriBuilder(iface); 05586 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 05587 05588 /* Host name can't be set to NULL. */ 05589 if(!pwzNewValue) 05590 return E_INVALIDARG; 05591 05592 return set_builder_component(&This->host, &This->host_len, pwzNewValue, 0, 05593 &This->modified_props, Uri_HAS_HOST); 05594 } 05595 05596 static HRESULT WINAPI UriBuilder_SetPassword(IUriBuilder *iface, LPCWSTR pwzNewValue) 05597 { 05598 UriBuilder *This = impl_from_IUriBuilder(iface); 05599 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 05600 return set_builder_component(&This->password, &This->password_len, pwzNewValue, 0, 05601 &This->modified_props, Uri_HAS_PASSWORD); 05602 } 05603 05604 static HRESULT WINAPI UriBuilder_SetPath(IUriBuilder *iface, LPCWSTR pwzNewValue) 05605 { 05606 UriBuilder *This = impl_from_IUriBuilder(iface); 05607 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 05608 return set_builder_component(&This->path, &This->path_len, pwzNewValue, 0, 05609 &This->modified_props, Uri_HAS_PATH); 05610 } 05611 05612 static HRESULT WINAPI UriBuilder_SetPort(IUriBuilder *iface, BOOL fHasPort, DWORD dwNewValue) 05613 { 05614 UriBuilder *This = impl_from_IUriBuilder(iface); 05615 TRACE("(%p)->(%d %d)\n", This, fHasPort, dwNewValue); 05616 05617 This->has_port = fHasPort; 05618 This->port = dwNewValue; 05619 This->modified_props |= Uri_HAS_PORT; 05620 return S_OK; 05621 } 05622 05623 static HRESULT WINAPI UriBuilder_SetQuery(IUriBuilder *iface, LPCWSTR pwzNewValue) 05624 { 05625 UriBuilder *This = impl_from_IUriBuilder(iface); 05626 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 05627 return set_builder_component(&This->query, &This->query_len, pwzNewValue, '?', 05628 &This->modified_props, Uri_HAS_QUERY); 05629 } 05630 05631 static HRESULT WINAPI UriBuilder_SetSchemeName(IUriBuilder *iface, LPCWSTR pwzNewValue) 05632 { 05633 UriBuilder *This = impl_from_IUriBuilder(iface); 05634 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 05635 05636 /* Only set the scheme name if it's not NULL or empty. */ 05637 if(!pwzNewValue || !*pwzNewValue) 05638 return E_INVALIDARG; 05639 05640 return set_builder_component(&This->scheme, &This->scheme_len, pwzNewValue, 0, 05641 &This->modified_props, Uri_HAS_SCHEME_NAME); 05642 } 05643 05644 static HRESULT WINAPI UriBuilder_SetUserName(IUriBuilder *iface, LPCWSTR pwzNewValue) 05645 { 05646 UriBuilder *This = impl_from_IUriBuilder(iface); 05647 TRACE("(%p)->(%s)\n", This, debugstr_w(pwzNewValue)); 05648 return set_builder_component(&This->username, &This->username_len, pwzNewValue, 0, 05649 &This->modified_props, Uri_HAS_USER_NAME); 05650 } 05651 05652 static HRESULT WINAPI UriBuilder_RemoveProperties(IUriBuilder *iface, DWORD dwPropertyMask) 05653 { 05654 const DWORD accepted_flags = Uri_HAS_AUTHORITY|Uri_HAS_DOMAIN|Uri_HAS_EXTENSION|Uri_HAS_FRAGMENT|Uri_HAS_HOST| 05655 Uri_HAS_PASSWORD|Uri_HAS_PATH|Uri_HAS_PATH_AND_QUERY|Uri_HAS_QUERY| 05656 Uri_HAS_USER_INFO|Uri_HAS_USER_NAME; 05657 05658 UriBuilder *This = impl_from_IUriBuilder(iface); 05659 TRACE("(%p)->(0x%08x)\n", This, dwPropertyMask); 05660 05661 if(dwPropertyMask & ~accepted_flags) 05662 return E_INVALIDARG; 05663 05664 if(dwPropertyMask & Uri_HAS_FRAGMENT) 05665 UriBuilder_SetFragment(iface, NULL); 05666 05667 /* Even though you can't set the host name to NULL or an 05668 * empty string, you can still remove it... for some reason. 05669 */ 05670 if(dwPropertyMask & Uri_HAS_HOST) 05671 set_builder_component(&This->host, &This->host_len, NULL, 0, 05672 &This->modified_props, Uri_HAS_HOST); 05673 05674 if(dwPropertyMask & Uri_HAS_PASSWORD) 05675 UriBuilder_SetPassword(iface, NULL); 05676 05677 if(dwPropertyMask & Uri_HAS_PATH) 05678 UriBuilder_SetPath(iface, NULL); 05679 05680 if(dwPropertyMask & Uri_HAS_PORT) 05681 UriBuilder_SetPort(iface, FALSE, 0); 05682 05683 if(dwPropertyMask & Uri_HAS_QUERY) 05684 UriBuilder_SetQuery(iface, NULL); 05685 05686 if(dwPropertyMask & Uri_HAS_USER_NAME) 05687 UriBuilder_SetUserName(iface, NULL); 05688 05689 return S_OK; 05690 } 05691 05692 static HRESULT WINAPI UriBuilder_HasBeenModified(IUriBuilder *iface, BOOL *pfModified) 05693 { 05694 UriBuilder *This = impl_from_IUriBuilder(iface); 05695 TRACE("(%p)->(%p)\n", This, pfModified); 05696 05697 if(!pfModified) 05698 return E_POINTER; 05699 05700 *pfModified = This->modified_props > 0; 05701 return S_OK; 05702 } 05703 05704 static const IUriBuilderVtbl UriBuilderVtbl = { 05705 UriBuilder_QueryInterface, 05706 UriBuilder_AddRef, 05707 UriBuilder_Release, 05708 UriBuilder_CreateUriSimple, 05709 UriBuilder_CreateUri, 05710 UriBuilder_CreateUriWithFlags, 05711 UriBuilder_GetIUri, 05712 UriBuilder_SetIUri, 05713 UriBuilder_GetFragment, 05714 UriBuilder_GetHost, 05715 UriBuilder_GetPassword, 05716 UriBuilder_GetPath, 05717 UriBuilder_GetPort, 05718 UriBuilder_GetQuery, 05719 UriBuilder_GetSchemeName, 05720 UriBuilder_GetUserName, 05721 UriBuilder_SetFragment, 05722 UriBuilder_SetHost, 05723 UriBuilder_SetPassword, 05724 UriBuilder_SetPath, 05725 UriBuilder_SetPort, 05726 UriBuilder_SetQuery, 05727 UriBuilder_SetSchemeName, 05728 UriBuilder_SetUserName, 05729 UriBuilder_RemoveProperties, 05730 UriBuilder_HasBeenModified, 05731 }; 05732 05733 /*********************************************************************** 05734 * CreateIUriBuilder (urlmon.@) 05735 */ 05736 HRESULT WINAPI CreateIUriBuilder(IUri *pIUri, DWORD dwFlags, DWORD_PTR dwReserved, IUriBuilder **ppIUriBuilder) 05737 { 05738 UriBuilder *ret; 05739 05740 TRACE("(%p %x %x %p)\n", pIUri, dwFlags, (DWORD)dwReserved, ppIUriBuilder); 05741 05742 if(!ppIUriBuilder) 05743 return E_POINTER; 05744 05745 ret = heap_alloc_zero(sizeof(UriBuilder)); 05746 if(!ret) 05747 return E_OUTOFMEMORY; 05748 05749 ret->IUriBuilder_iface.lpVtbl = &UriBuilderVtbl; 05750 ret->ref = 1; 05751 05752 if(pIUri) { 05753 Uri *uri; 05754 05755 if((uri = get_uri_obj(pIUri))) { 05756 IUri_AddRef(pIUri); 05757 ret->uri = uri; 05758 05759 if(uri->has_port) 05760 /* Windows doesn't set 'has_port' to TRUE in this case. */ 05761 ret->port = uri->port; 05762 05763 } else { 05764 heap_free(ret); 05765 *ppIUriBuilder = NULL; 05766 FIXME("(%p %x %x %p): Unknown IUri types not supported yet.\n", pIUri, dwFlags, 05767 (DWORD)dwReserved, ppIUriBuilder); 05768 return E_NOTIMPL; 05769 } 05770 } 05771 05772 *ppIUriBuilder = &ret->IUriBuilder_iface; 05773 return S_OK; 05774 } 05775 05776 /* Merges the base path with the relative path and stores the resulting path 05777 * and path len in 'result' and 'result_len'. 05778 */ 05779 static HRESULT merge_paths(parse_data *data, const WCHAR *base, DWORD base_len, const WCHAR *relative, 05780 DWORD relative_len, WCHAR **result, DWORD *result_len, DWORD flags) 05781 { 05782 const WCHAR *end = NULL; 05783 DWORD base_copy_len = 0; 05784 WCHAR *ptr; 05785 05786 if(base_len) { 05787 /* Find the characters the will be copied over from 05788 * the base path. 05789 */ 05790 end = memrchrW(base, '/', base_len); 05791 if(!end && data->scheme_type == URL_SCHEME_FILE) 05792 /* Try looking for a '\\'. */ 05793 end = memrchrW(base, '\\', base_len); 05794 } 05795 05796 if(end) { 05797 base_copy_len = (end+1)-base; 05798 *result = heap_alloc((base_copy_len+relative_len+1)*sizeof(WCHAR)); 05799 } else 05800 *result = heap_alloc((relative_len+1)*sizeof(WCHAR)); 05801 05802 if(!(*result)) { 05803 *result_len = 0; 05804 return E_OUTOFMEMORY; 05805 } 05806 05807 ptr = *result; 05808 if(end) { 05809 memcpy(ptr, base, base_copy_len*sizeof(WCHAR)); 05810 ptr += base_copy_len; 05811 } 05812 05813 memcpy(ptr, relative, relative_len*sizeof(WCHAR)); 05814 ptr += relative_len; 05815 *ptr = '\0'; 05816 05817 *result_len = (ptr-*result); 05818 return S_OK; 05819 } 05820 05821 static HRESULT combine_uri(Uri *base, Uri *relative, DWORD flags, IUri **result, DWORD extras) { 05822 Uri *ret; 05823 HRESULT hr; 05824 parse_data data; 05825 DWORD create_flags = 0, len = 0; 05826 05827 memset(&data, 0, sizeof(parse_data)); 05828 05829 /* Base case is when the relative Uri has a scheme name, 05830 * if it does, then 'result' will contain the same data 05831 * as the relative Uri. 05832 */ 05833 if(relative->scheme_start > -1) { 05834 data.uri = SysAllocString(relative->raw_uri); 05835 if(!data.uri) { 05836 *result = NULL; 05837 return E_OUTOFMEMORY; 05838 } 05839 05840 parse_uri(&data, 0); 05841 05842 ret = create_uri_obj(); 05843 if(!ret) { 05844 *result = NULL; 05845 return E_OUTOFMEMORY; 05846 } 05847 05848 if(extras & COMBINE_URI_FORCE_FLAG_USE) { 05849 if(flags & URL_DONT_SIMPLIFY) 05850 create_flags |= Uri_CREATE_NO_CANONICALIZE; 05851 if(flags & URL_DONT_UNESCAPE_EXTRA_INFO) 05852 create_flags |= Uri_CREATE_NO_DECODE_EXTRA_INFO; 05853 } 05854 05855 ret->raw_uri = data.uri; 05856 hr = canonicalize_uri(&data, ret, create_flags); 05857 if(FAILED(hr)) { 05858 IUri_Release(&ret->IUri_iface); 05859 *result = NULL; 05860 return hr; 05861 } 05862 05863 apply_default_flags(&create_flags); 05864 ret->create_flags = create_flags; 05865 05866 *result = &ret->IUri_iface; 05867 } else { 05868 WCHAR *path = NULL; 05869 DWORD raw_flags = 0; 05870 05871 if(base->scheme_start > -1) { 05872 data.scheme = base->canon_uri+base->scheme_start; 05873 data.scheme_len = base->scheme_len; 05874 data.scheme_type = base->scheme_type; 05875 } else { 05876 data.is_relative = TRUE; 05877 data.scheme_type = URL_SCHEME_UNKNOWN; 05878 create_flags |= Uri_CREATE_ALLOW_RELATIVE; 05879 } 05880 05881 if(base->authority_start > -1) { 05882 if(base->userinfo_start > -1 && base->userinfo_split != 0) { 05883 data.username = base->canon_uri+base->userinfo_start; 05884 data.username_len = (base->userinfo_split > -1) ? base->userinfo_split : base->userinfo_len; 05885 } 05886 05887 if(base->userinfo_split > -1) { 05888 data.password = base->canon_uri+base->userinfo_start+base->userinfo_split+1; 05889 data.password_len = base->userinfo_len-base->userinfo_split-1; 05890 } 05891 05892 if(base->host_start > -1) { 05893 data.host = base->canon_uri+base->host_start; 05894 data.host_len = base->host_len; 05895 data.host_type = base->host_type; 05896 } 05897 05898 if(base->has_port) { 05899 data.has_port = TRUE; 05900 data.port_value = base->port; 05901 } 05902 } else if(base->scheme_type != URL_SCHEME_FILE) 05903 data.is_opaque = TRUE; 05904 05905 if(relative->path_start == -1 || !relative->path_len) { 05906 if(base->path_start > -1) { 05907 data.path = base->canon_uri+base->path_start; 05908 data.path_len = base->path_len; 05909 } else if((base->path_start == -1 || !base->path_len) && !data.is_opaque) { 05910 /* Just set the path as a '/' if the base didn't have 05911 * one and if it's an hierarchical URI. 05912 */ 05913 static const WCHAR slashW[] = {'/',0}; 05914 data.path = slashW; 05915 data.path_len = 1; 05916 } 05917 05918 if(relative->query_start > -1) { 05919 data.query = relative->canon_uri+relative->query_start; 05920 data.query_len = relative->query_len; 05921 } else if(base->query_start > -1) { 05922 data.query = base->canon_uri+base->query_start; 05923 data.query_len = base->query_len; 05924 } 05925 } else { 05926 const WCHAR *ptr, **pptr; 05927 DWORD path_offset = 0, path_len = 0; 05928 05929 /* There's two possibilities on what will happen to the path component 05930 * of the result IUri. First, if the relative path begins with a '/' 05931 * then the resulting path will just be the relative path. Second, if 05932 * relative path doesn't begin with a '/' then the base path and relative 05933 * path are merged together. 05934 */ 05935 if(relative->path_len && *(relative->canon_uri+relative->path_start) == '/') { 05936 WCHAR *tmp = NULL; 05937 BOOL copy_drive_path = FALSE; 05938 05939 /* If the relative IUri's path starts with a '/', then we 05940 * don't use the base IUri's path. Unless the base IUri 05941 * is a file URI, in which case it uses the drive path of 05942 * the base IUri (if it has any) in the new path. 05943 */ 05944 if(base->scheme_type == URL_SCHEME_FILE) { 05945 if(base->path_len > 3 && *(base->canon_uri+base->path_start) == '/' && 05946 is_drive_path(base->canon_uri+base->path_start+1)) { 05947 path_len += 3; 05948 copy_drive_path = TRUE; 05949 } 05950 } 05951 05952 path_len += relative->path_len; 05953 05954 path = heap_alloc((path_len+1)*sizeof(WCHAR)); 05955 if(!path) { 05956 *result = NULL; 05957 return E_OUTOFMEMORY; 05958 } 05959 05960 tmp = path; 05961 05962 /* Copy the base paths, drive path over. */ 05963 if(copy_drive_path) { 05964 memcpy(tmp, base->canon_uri+base->path_start, 3*sizeof(WCHAR)); 05965 tmp += 3; 05966 } 05967 05968 memcpy(tmp, relative->canon_uri+relative->path_start, relative->path_len*sizeof(WCHAR)); 05969 path[path_len] = '\0'; 05970 } else { 05971 /* Merge the base path with the relative path. */ 05972 hr = merge_paths(&data, base->canon_uri+base->path_start, base->path_len, 05973 relative->canon_uri+relative->path_start, relative->path_len, 05974 &path, &path_len, flags); 05975 if(FAILED(hr)) { 05976 *result = NULL; 05977 return hr; 05978 } 05979 05980 /* If the resulting IUri is a file URI, the drive path isn't 05981 * reduced out when the dot segments are removed. 05982 */ 05983 if(path_len >= 3 && data.scheme_type == URL_SCHEME_FILE && !data.host) { 05984 if(*path == '/' && is_drive_path(path+1)) 05985 path_offset = 2; 05986 else if(is_drive_path(path)) 05987 path_offset = 1; 05988 } 05989 } 05990 05991 /* Check if the dot segments need to be removed from the path. */ 05992 if(!(flags & URL_DONT_SIMPLIFY) && !data.is_opaque) { 05993 DWORD offset = (path_offset > 0) ? path_offset+1 : 0; 05994 DWORD new_len = remove_dot_segments(path+offset,path_len-offset); 05995 05996 if(new_len != path_len) { 05997 WCHAR *tmp = heap_realloc(path, (offset+new_len+1)*sizeof(WCHAR)); 05998 if(!tmp) { 05999 heap_free(path); 06000 *result = NULL; 06001 return E_OUTOFMEMORY; 06002 } 06003 06004 tmp[new_len+offset] = '\0'; 06005 path = tmp; 06006 path_len = new_len+offset; 06007 } 06008 } 06009 06010 if(relative->query_start > -1) { 06011 data.query = relative->canon_uri+relative->query_start; 06012 data.query_len = relative->query_len; 06013 } 06014 06015 /* Make sure the path component is valid. */ 06016 ptr = path; 06017 pptr = &ptr; 06018 if((data.is_opaque && !parse_path_opaque(pptr, &data, 0)) || 06019 (!data.is_opaque && !parse_path_hierarchical(pptr, &data, 0))) { 06020 heap_free(path); 06021 *result = NULL; 06022 return E_INVALIDARG; 06023 } 06024 } 06025 06026 if(relative->fragment_start > -1) { 06027 data.fragment = relative->canon_uri+relative->fragment_start; 06028 data.fragment_len = relative->fragment_len; 06029 } 06030 06031 if(flags & URL_DONT_SIMPLIFY) 06032 raw_flags |= RAW_URI_FORCE_PORT_DISP; 06033 if(flags & URL_FILE_USE_PATHURL) 06034 raw_flags |= RAW_URI_CONVERT_TO_DOS_PATH; 06035 06036 len = generate_raw_uri(&data, data.uri, raw_flags); 06037 data.uri = SysAllocStringLen(NULL, len); 06038 if(!data.uri) { 06039 heap_free(path); 06040 *result = NULL; 06041 return E_OUTOFMEMORY; 06042 } 06043 06044 generate_raw_uri(&data, data.uri, raw_flags); 06045 06046 ret = create_uri_obj(); 06047 if(!ret) { 06048 SysFreeString(data.uri); 06049 heap_free(path); 06050 *result = NULL; 06051 return E_OUTOFMEMORY; 06052 } 06053 06054 if(flags & URL_DONT_SIMPLIFY) 06055 create_flags |= Uri_CREATE_NO_CANONICALIZE; 06056 if(flags & URL_FILE_USE_PATHURL) 06057 create_flags |= Uri_CREATE_FILE_USE_DOS_PATH; 06058 06059 ret->raw_uri = data.uri; 06060 hr = canonicalize_uri(&data, ret, create_flags); 06061 if(FAILED(hr)) { 06062 IUri_Release(&ret->IUri_iface); 06063 *result = NULL; 06064 return hr; 06065 } 06066 06067 if(flags & URL_DONT_SIMPLIFY) 06068 ret->display_modifiers |= URI_DISPLAY_NO_DEFAULT_PORT_AUTH; 06069 06070 apply_default_flags(&create_flags); 06071 ret->create_flags = create_flags; 06072 *result = &ret->IUri_iface; 06073 06074 heap_free(path); 06075 } 06076 06077 return S_OK; 06078 } 06079 06080 /*********************************************************************** 06081 * CoInternetCombineIUri (urlmon.@) 06082 */ 06083 HRESULT WINAPI CoInternetCombineIUri(IUri *pBaseUri, IUri *pRelativeUri, DWORD dwCombineFlags, 06084 IUri **ppCombinedUri, DWORD_PTR dwReserved) 06085 { 06086 HRESULT hr; 06087 IInternetProtocolInfo *info; 06088 Uri *relative, *base; 06089 TRACE("(%p %p %x %p %x)\n", pBaseUri, pRelativeUri, dwCombineFlags, ppCombinedUri, (DWORD)dwReserved); 06090 06091 if(!ppCombinedUri) 06092 return E_INVALIDARG; 06093 06094 if(!pBaseUri || !pRelativeUri) { 06095 *ppCombinedUri = NULL; 06096 return E_INVALIDARG; 06097 } 06098 06099 relative = get_uri_obj(pRelativeUri); 06100 base = get_uri_obj(pBaseUri); 06101 if(!relative || !base) { 06102 *ppCombinedUri = NULL; 06103 FIXME("(%p %p %x %p %x) Unknown IUri types not supported yet.\n", 06104 pBaseUri, pRelativeUri, dwCombineFlags, ppCombinedUri, (DWORD)dwReserved); 06105 return E_NOTIMPL; 06106 } 06107 06108 info = get_protocol_info(base->canon_uri); 06109 if(info) { 06110 WCHAR result[INTERNET_MAX_URL_LENGTH+1]; 06111 DWORD result_len = 0; 06112 06113 hr = IInternetProtocolInfo_CombineUrl(info, base->canon_uri, relative->canon_uri, dwCombineFlags, 06114 result, INTERNET_MAX_URL_LENGTH+1, &result_len, 0); 06115 IInternetProtocolInfo_Release(info); 06116 if(SUCCEEDED(hr)) { 06117 hr = CreateUri(result, Uri_CREATE_ALLOW_RELATIVE, 0, ppCombinedUri); 06118 if(SUCCEEDED(hr)) 06119 return hr; 06120 } 06121 } 06122 06123 return combine_uri(base, relative, dwCombineFlags, ppCombinedUri, 0); 06124 } 06125 06126 /*********************************************************************** 06127 * CoInternetCombineUrlEx (urlmon.@) 06128 */ 06129 HRESULT WINAPI CoInternetCombineUrlEx(IUri *pBaseUri, LPCWSTR pwzRelativeUrl, DWORD dwCombineFlags, 06130 IUri **ppCombinedUri, DWORD_PTR dwReserved) 06131 { 06132 IUri *relative; 06133 Uri *base; 06134 HRESULT hr; 06135 IInternetProtocolInfo *info; 06136 06137 TRACE("(%p %s %x %p %x) stub\n", pBaseUri, debugstr_w(pwzRelativeUrl), dwCombineFlags, 06138 ppCombinedUri, (DWORD)dwReserved); 06139 06140 if(!ppCombinedUri) 06141 return E_POINTER; 06142 06143 if(!pwzRelativeUrl) { 06144 *ppCombinedUri = NULL; 06145 return E_UNEXPECTED; 06146 } 06147 06148 if(!pBaseUri) { 06149 *ppCombinedUri = NULL; 06150 return E_INVALIDARG; 06151 } 06152 06153 base = get_uri_obj(pBaseUri); 06154 if(!base) { 06155 *ppCombinedUri = NULL; 06156 FIXME("(%p %s %x %p %x) Unknown IUri's not supported yet.\n", pBaseUri, debugstr_w(pwzRelativeUrl), 06157 dwCombineFlags, ppCombinedUri, (DWORD)dwReserved); 06158 return E_NOTIMPL; 06159 } 06160 06161 info = get_protocol_info(base->canon_uri); 06162 if(info) { 06163 WCHAR result[INTERNET_MAX_URL_LENGTH+1]; 06164 DWORD result_len = 0; 06165 06166 hr = IInternetProtocolInfo_CombineUrl(info, base->canon_uri, pwzRelativeUrl, dwCombineFlags, 06167 result, INTERNET_MAX_URL_LENGTH+1, &result_len, 0); 06168 IInternetProtocolInfo_Release(info); 06169 if(SUCCEEDED(hr)) { 06170 hr = CreateUri(result, Uri_CREATE_ALLOW_RELATIVE, 0, ppCombinedUri); 06171 if(SUCCEEDED(hr)) 06172 return hr; 06173 } 06174 } 06175 06176 hr = CreateUri(pwzRelativeUrl, Uri_CREATE_ALLOW_RELATIVE, 0, &relative); 06177 if(FAILED(hr)) { 06178 *ppCombinedUri = NULL; 06179 return hr; 06180 } 06181 06182 hr = combine_uri(base, get_uri_obj(relative), dwCombineFlags, ppCombinedUri, COMBINE_URI_FORCE_FLAG_USE); 06183 06184 IUri_Release(relative); 06185 return hr; 06186 } 06187 06188 static HRESULT parse_canonicalize(const Uri *uri, DWORD flags, LPWSTR output, 06189 DWORD output_len, DWORD *result_len) 06190 { 06191 const WCHAR *ptr = NULL; 06192 WCHAR *path = NULL; 06193 const WCHAR **pptr; 06194 WCHAR buffer[INTERNET_MAX_URL_LENGTH+1]; 06195 DWORD len = 0; 06196 BOOL reduce_path; 06197 06198 /* URL_UNESCAPE only has effect if none of the URL_ESCAPE flags are set. */ 06199 const BOOL allow_unescape = !(flags & URL_ESCAPE_UNSAFE) && 06200 !(flags & URL_ESCAPE_SPACES_ONLY) && 06201 !(flags & URL_ESCAPE_PERCENT); 06202 06203 06204 /* Check if the dot segments need to be removed from the 06205 * path component. 06206 */ 06207 if(uri->scheme_start > -1 && uri->path_start > -1) { 06208 ptr = uri->canon_uri+uri->scheme_start+uri->scheme_len+1; 06209 pptr = &ptr; 06210 } 06211 reduce_path = !(flags & URL_NO_META) && 06212 !(flags & URL_DONT_SIMPLIFY) && 06213 ptr && check_hierarchical(pptr); 06214 06215 for(ptr = uri->canon_uri; ptr < uri->canon_uri+uri->canon_len; ++ptr) { 06216 BOOL do_default_action = TRUE; 06217 06218 /* Keep track of the path if we need to remove dot segments from 06219 * it later. 06220 */ 06221 if(reduce_path && !path && ptr == uri->canon_uri+uri->path_start) 06222 path = buffer+len; 06223 06224 /* Check if it's time to reduce the path. */ 06225 if(reduce_path && ptr == uri->canon_uri+uri->path_start+uri->path_len) { 06226 DWORD current_path_len = (buffer+len) - path; 06227 DWORD new_path_len = remove_dot_segments(path, current_path_len); 06228 06229 /* Update the current length. */ 06230 len -= (current_path_len-new_path_len); 06231 reduce_path = FALSE; 06232 } 06233 06234 if(*ptr == '%') { 06235 const WCHAR decoded = decode_pct_val(ptr); 06236 if(decoded) { 06237 if(allow_unescape && (flags & URL_UNESCAPE)) { 06238 buffer[len++] = decoded; 06239 ptr += 2; 06240 do_default_action = FALSE; 06241 } 06242 } 06243 06244 /* See if %'s needed to encoded. */ 06245 if(do_default_action && (flags & URL_ESCAPE_PERCENT)) { 06246 pct_encode_val(*ptr, buffer+len); 06247 len += 3; 06248 do_default_action = FALSE; 06249 } 06250 } else if(*ptr == ' ') { 06251 if((flags & URL_ESCAPE_SPACES_ONLY) && 06252 !(flags & URL_ESCAPE_UNSAFE)) { 06253 pct_encode_val(*ptr, buffer+len); 06254 len += 3; 06255 do_default_action = FALSE; 06256 } 06257 } else if(!is_reserved(*ptr) && !is_unreserved(*ptr)) { 06258 if(flags & URL_ESCAPE_UNSAFE) { 06259 pct_encode_val(*ptr, buffer+len); 06260 len += 3; 06261 do_default_action = FALSE; 06262 } 06263 } 06264 06265 if(do_default_action) 06266 buffer[len++] = *ptr; 06267 } 06268 06269 /* Sometimes the path is the very last component of the IUri, so 06270 * see if the dot segments need to be reduced now. 06271 */ 06272 if(reduce_path && path) { 06273 DWORD current_path_len = (buffer+len) - path; 06274 DWORD new_path_len = remove_dot_segments(path, current_path_len); 06275 06276 /* Update the current length. */ 06277 len -= (current_path_len-new_path_len); 06278 } 06279 06280 buffer[len++] = 0; 06281 06282 /* The null terminator isn't included the length. */ 06283 *result_len = len-1; 06284 if(len > output_len) 06285 return STRSAFE_E_INSUFFICIENT_BUFFER; 06286 else 06287 memcpy(output, buffer, len*sizeof(WCHAR)); 06288 06289 return S_OK; 06290 } 06291 06292 static HRESULT parse_friendly(IUri *uri, LPWSTR output, DWORD output_len, 06293 DWORD *result_len) 06294 { 06295 HRESULT hr; 06296 DWORD display_len; 06297 BSTR display; 06298 06299 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_DISPLAY_URI, &display_len, 0); 06300 if(FAILED(hr)) { 06301 *result_len = 0; 06302 return hr; 06303 } 06304 06305 *result_len = display_len; 06306 if(display_len+1 > output_len) 06307 return STRSAFE_E_INSUFFICIENT_BUFFER; 06308 06309 hr = IUri_GetDisplayUri(uri, &display); 06310 if(FAILED(hr)) { 06311 *result_len = 0; 06312 return hr; 06313 } 06314 06315 memcpy(output, display, (display_len+1)*sizeof(WCHAR)); 06316 SysFreeString(display); 06317 return S_OK; 06318 } 06319 06320 static HRESULT parse_rootdocument(const Uri *uri, LPWSTR output, DWORD output_len, 06321 DWORD *result_len) 06322 { 06323 static const WCHAR colon_slashesW[] = {':','/','/'}; 06324 06325 WCHAR *ptr; 06326 DWORD len = 0; 06327 06328 /* Windows only returns the root document if the URI has an authority 06329 * and it's not an unknown scheme type or a file scheme type. 06330 */ 06331 if(uri->authority_start == -1 || 06332 uri->scheme_type == URL_SCHEME_UNKNOWN || 06333 uri->scheme_type == URL_SCHEME_FILE) { 06334 *result_len = 0; 06335 if(!output_len) 06336 return STRSAFE_E_INSUFFICIENT_BUFFER; 06337 06338 output[0] = 0; 06339 return S_OK; 06340 } 06341 06342 len = uri->scheme_len+uri->authority_len; 06343 /* For the "://" and '/' which will be added. */ 06344 len += 4; 06345 06346 if(len+1 > output_len) { 06347 *result_len = len; 06348 return STRSAFE_E_INSUFFICIENT_BUFFER; 06349 } 06350 06351 ptr = output; 06352 memcpy(ptr, uri->canon_uri+uri->scheme_start, uri->scheme_len*sizeof(WCHAR)); 06353 06354 /* Add the "://". */ 06355 ptr += uri->scheme_len; 06356 memcpy(ptr, colon_slashesW, sizeof(colon_slashesW)); 06357 06358 /* Add the authority. */ 06359 ptr += sizeof(colon_slashesW)/sizeof(WCHAR); 06360 memcpy(ptr, uri->canon_uri+uri->authority_start, uri->authority_len*sizeof(WCHAR)); 06361 06362 /* Add the '/' after the authority. */ 06363 ptr += uri->authority_len; 06364 *ptr = '/'; 06365 ptr[1] = 0; 06366 06367 *result_len = len; 06368 return S_OK; 06369 } 06370 06371 static HRESULT parse_document(const Uri *uri, LPWSTR output, DWORD output_len, 06372 DWORD *result_len) 06373 { 06374 DWORD len = 0; 06375 06376 /* It has to be a known scheme type, but, it can't be a file 06377 * scheme. It also has to hierarchical. 06378 */ 06379 if(uri->scheme_type == URL_SCHEME_UNKNOWN || 06380 uri->scheme_type == URL_SCHEME_FILE || 06381 uri->authority_start == -1) { 06382 *result_len = 0; 06383 if(output_len < 1) 06384 return STRSAFE_E_INSUFFICIENT_BUFFER; 06385 06386 output[0] = 0; 06387 return S_OK; 06388 } 06389 06390 if(uri->fragment_start > -1) 06391 len = uri->fragment_start; 06392 else 06393 len = uri->canon_len; 06394 06395 *result_len = len; 06396 if(len+1 > output_len) 06397 return STRSAFE_E_INSUFFICIENT_BUFFER; 06398 06399 memcpy(output, uri->canon_uri, len*sizeof(WCHAR)); 06400 output[len] = 0; 06401 return S_OK; 06402 } 06403 06404 static HRESULT parse_path_from_url(const Uri *uri, LPWSTR output, DWORD output_len, 06405 DWORD *result_len) 06406 { 06407 const WCHAR *path_ptr; 06408 WCHAR buffer[INTERNET_MAX_URL_LENGTH+1]; 06409 WCHAR *ptr; 06410 06411 if(uri->scheme_type != URL_SCHEME_FILE) { 06412 *result_len = 0; 06413 if(output_len > 0) 06414 output[0] = 0; 06415 return E_INVALIDARG; 06416 } 06417 06418 ptr = buffer; 06419 if(uri->host_start > -1) { 06420 static const WCHAR slash_slashW[] = {'\\','\\'}; 06421 06422 memcpy(ptr, slash_slashW, sizeof(slash_slashW)); 06423 ptr += sizeof(slash_slashW)/sizeof(WCHAR); 06424 memcpy(ptr, uri->canon_uri+uri->host_start, uri->host_len*sizeof(WCHAR)); 06425 ptr += uri->host_len; 06426 } 06427 06428 path_ptr = uri->canon_uri+uri->path_start; 06429 if(uri->path_len > 3 && *path_ptr == '/' && is_drive_path(path_ptr+1)) 06430 /* Skip past the '/' in front of the drive path. */ 06431 ++path_ptr; 06432 06433 for(; path_ptr < uri->canon_uri+uri->path_start+uri->path_len; ++path_ptr, ++ptr) { 06434 BOOL do_default_action = TRUE; 06435 06436 if(*path_ptr == '%') { 06437 const WCHAR decoded = decode_pct_val(path_ptr); 06438 if(decoded) { 06439 *ptr = decoded; 06440 path_ptr += 2; 06441 do_default_action = FALSE; 06442 } 06443 } else if(*path_ptr == '/') { 06444 *ptr = '\\'; 06445 do_default_action = FALSE; 06446 } 06447 06448 if(do_default_action) 06449 *ptr = *path_ptr; 06450 } 06451 06452 *ptr = 0; 06453 06454 *result_len = ptr-buffer; 06455 if(*result_len+1 > output_len) 06456 return STRSAFE_E_INSUFFICIENT_BUFFER; 06457 06458 memcpy(output, buffer, (*result_len+1)*sizeof(WCHAR)); 06459 return S_OK; 06460 } 06461 06462 static HRESULT parse_url_from_path(IUri *uri, LPWSTR output, DWORD output_len, 06463 DWORD *result_len) 06464 { 06465 HRESULT hr; 06466 BSTR received; 06467 DWORD len = 0; 06468 06469 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_ABSOLUTE_URI, &len, 0); 06470 if(FAILED(hr)) { 06471 *result_len = 0; 06472 return hr; 06473 } 06474 06475 *result_len = len; 06476 if(len+1 > output_len) 06477 return STRSAFE_E_INSUFFICIENT_BUFFER; 06478 06479 hr = IUri_GetAbsoluteUri(uri, &received); 06480 if(FAILED(hr)) { 06481 *result_len = 0; 06482 return hr; 06483 } 06484 06485 memcpy(output, received, (len+1)*sizeof(WCHAR)); 06486 SysFreeString(received); 06487 06488 return S_OK; 06489 } 06490 06491 static HRESULT parse_schema(IUri *uri, LPWSTR output, DWORD output_len, 06492 DWORD *result_len) 06493 { 06494 HRESULT hr; 06495 DWORD len; 06496 BSTR received; 06497 06498 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_SCHEME_NAME, &len, 0); 06499 if(FAILED(hr)) { 06500 *result_len = 0; 06501 return hr; 06502 } 06503 06504 *result_len = len; 06505 if(len+1 > output_len) 06506 return STRSAFE_E_INSUFFICIENT_BUFFER; 06507 06508 hr = IUri_GetSchemeName(uri, &received); 06509 if(FAILED(hr)) { 06510 *result_len = 0; 06511 return hr; 06512 } 06513 06514 memcpy(output, received, (len+1)*sizeof(WCHAR)); 06515 SysFreeString(received); 06516 06517 return S_OK; 06518 } 06519 06520 static HRESULT parse_site(IUri *uri, LPWSTR output, DWORD output_len, DWORD *result_len) 06521 { 06522 HRESULT hr; 06523 DWORD len; 06524 BSTR received; 06525 06526 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_HOST, &len, 0); 06527 if(FAILED(hr)) { 06528 *result_len = 0; 06529 return hr; 06530 } 06531 06532 *result_len = len; 06533 if(len+1 > output_len) 06534 return STRSAFE_E_INSUFFICIENT_BUFFER; 06535 06536 hr = IUri_GetHost(uri, &received); 06537 if(FAILED(hr)) { 06538 *result_len = 0; 06539 return hr; 06540 } 06541 06542 memcpy(output, received, (len+1)*sizeof(WCHAR)); 06543 SysFreeString(received); 06544 06545 return S_OK; 06546 } 06547 06548 static HRESULT parse_domain(IUri *uri, LPWSTR output, DWORD output_len, DWORD *result_len) 06549 { 06550 HRESULT hr; 06551 DWORD len; 06552 BSTR received; 06553 06554 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_DOMAIN, &len, 0); 06555 if(FAILED(hr)) { 06556 *result_len = 0; 06557 return hr; 06558 } 06559 06560 *result_len = len; 06561 if(len+1 > output_len) 06562 return STRSAFE_E_INSUFFICIENT_BUFFER; 06563 06564 hr = IUri_GetDomain(uri, &received); 06565 if(FAILED(hr)) { 06566 *result_len = 0; 06567 return hr; 06568 } 06569 06570 memcpy(output, received, (len+1)*sizeof(WCHAR)); 06571 SysFreeString(received); 06572 06573 return S_OK; 06574 } 06575 06576 static HRESULT parse_anchor(IUri *uri, LPWSTR output, DWORD output_len, DWORD *result_len) 06577 { 06578 HRESULT hr; 06579 DWORD len; 06580 BSTR received; 06581 06582 hr = IUri_GetPropertyLength(uri, Uri_PROPERTY_FRAGMENT, &len, 0); 06583 if(FAILED(hr)) { 06584 *result_len = 0; 06585 return hr; 06586 } 06587 06588 *result_len = len; 06589 if(len+1 > output_len) 06590 return STRSAFE_E_INSUFFICIENT_BUFFER; 06591 06592 hr = IUri_GetFragment(uri, &received); 06593 if(FAILED(hr)) { 06594 *result_len = 0; 06595 return hr; 06596 } 06597 06598 memcpy(output, received, (len+1)*sizeof(WCHAR)); 06599 SysFreeString(received); 06600 06601 return S_OK; 06602 } 06603 06604 /*********************************************************************** 06605 * CoInternetParseIUri (urlmon.@) 06606 */ 06607 HRESULT WINAPI CoInternetParseIUri(IUri *pIUri, PARSEACTION ParseAction, DWORD dwFlags, 06608 LPWSTR pwzResult, DWORD cchResult, DWORD *pcchResult, 06609 DWORD_PTR dwReserved) 06610 { 06611 HRESULT hr; 06612 Uri *uri; 06613 IInternetProtocolInfo *info; 06614 06615 TRACE("(%p %d %x %p %d %p %x)\n", pIUri, ParseAction, dwFlags, pwzResult, 06616 cchResult, pcchResult, (DWORD)dwReserved); 06617 06618 if(!pcchResult) 06619 return E_POINTER; 06620 06621 if(!pwzResult || !pIUri) { 06622 *pcchResult = 0; 06623 return E_INVALIDARG; 06624 } 06625 06626 if(!(uri = get_uri_obj(pIUri))) { 06627 *pcchResult = 0; 06628 FIXME("(%p %d %x %p %d %p %x) Unknown IUri's not supported for this action.\n", 06629 pIUri, ParseAction, dwFlags, pwzResult, cchResult, pcchResult, (DWORD)dwReserved); 06630 return E_NOTIMPL; 06631 } 06632 06633 info = get_protocol_info(uri->canon_uri); 06634 if(info) { 06635 hr = IInternetProtocolInfo_ParseUrl(info, uri->canon_uri, ParseAction, dwFlags, 06636 pwzResult, cchResult, pcchResult, 0); 06637 IInternetProtocolInfo_Release(info); 06638 if(SUCCEEDED(hr)) return hr; 06639 } 06640 06641 switch(ParseAction) { 06642 case PARSE_CANONICALIZE: 06643 hr = parse_canonicalize(uri, dwFlags, pwzResult, cchResult, pcchResult); 06644 break; 06645 case PARSE_FRIENDLY: 06646 hr = parse_friendly(pIUri, pwzResult, cchResult, pcchResult); 06647 break; 06648 case PARSE_ROOTDOCUMENT: 06649 hr = parse_rootdocument(uri, pwzResult, cchResult, pcchResult); 06650 break; 06651 case PARSE_DOCUMENT: 06652 hr = parse_document(uri, pwzResult, cchResult, pcchResult); 06653 break; 06654 case PARSE_PATH_FROM_URL: 06655 hr = parse_path_from_url(uri, pwzResult, cchResult, pcchResult); 06656 break; 06657 case PARSE_URL_FROM_PATH: 06658 hr = parse_url_from_path(pIUri, pwzResult, cchResult, pcchResult); 06659 break; 06660 case PARSE_SCHEMA: 06661 hr = parse_schema(pIUri, pwzResult, cchResult, pcchResult); 06662 break; 06663 case PARSE_SITE: 06664 hr = parse_site(pIUri, pwzResult, cchResult, pcchResult); 06665 break; 06666 case PARSE_DOMAIN: 06667 hr = parse_domain(pIUri, pwzResult, cchResult, pcchResult); 06668 break; 06669 case PARSE_LOCATION: 06670 case PARSE_ANCHOR: 06671 hr = parse_anchor(pIUri, pwzResult, cchResult, pcchResult); 06672 break; 06673 case PARSE_SECURITY_URL: 06674 case PARSE_MIME: 06675 case PARSE_SERVER: 06676 case PARSE_SECURITY_DOMAIN: 06677 *pcchResult = 0; 06678 hr = E_FAIL; 06679 break; 06680 default: 06681 *pcchResult = 0; 06682 hr = E_NOTIMPL; 06683 FIXME("(%p %d %x %p %d %p %x) Partial stub.\n", pIUri, ParseAction, dwFlags, 06684 pwzResult, cchResult, pcchResult, (DWORD)dwReserved); 06685 } 06686 06687 return hr; 06688 } Generated on Sun May 27 2012 04:26:42 for ReactOS by
1.7.6.1
|