xref: /core/svl/source/misc/urihelper.cxx (revision 52b8697a)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <memory>
21 #include <string_view>
22 
23 #include <sal/config.h>
24 
25 #include <unicode/idna.h>
26 
27 #include <svl/urihelper.hxx>
28 #include <com/sun/star/ucb/Command.hpp>
29 #include <com/sun/star/ucb/IllegalIdentifierException.hpp>
30 #include <com/sun/star/ucb/UniversalContentBroker.hpp>
31 #include <com/sun/star/ucb/UnsupportedCommandException.hpp>
32 #include <com/sun/star/ucb/XCommandEnvironment.hpp>
33 #include <com/sun/star/ucb/XCommandProcessor.hpp>
34 #include <com/sun/star/ucb/XContent.hpp>
35 #include <com/sun/star/ucb/XUniversalContentBroker.hpp>
36 #include <com/sun/star/uno/Any.hxx>
37 #include <com/sun/star/uno/Exception.hpp>
38 #include <com/sun/star/uno/Reference.hxx>
39 #include <com/sun/star/uno/RuntimeException.hpp>
40 #include <com/sun/star/uno/XComponentContext.hpp>
41 #include <com/sun/star/uri/UriReferenceFactory.hpp>
42 #include <com/sun/star/uri/XUriReference.hpp>
43 #include <com/sun/star/uri/XUriReferenceFactory.hpp>
44 #include <comphelper/processfactory.hxx>
45 #include <osl/diagnose.h>
46 #include <rtl/character.hxx>
47 #include <rtl/instance.hxx>
48 #include <rtl/ustrbuf.hxx>
49 #include <rtl/ustring.hxx>
50 #include <sal/types.h>
51 #include <sal/log.hxx>
52 #include <tools/inetmime.hxx>
53 #include <unotools/charclass.hxx>
54 
55 using namespace com::sun::star;
56 
57 OUString URIHelper::SmartRel2Abs(INetURLObject const & rTheBaseURIRef,
58                                  OUString const & rTheRelURIRef,
59                                  Link<OUString *, bool> const & rMaybeFileHdl,
60                                  bool bCheckFileExists,
61                                  bool bIgnoreFragment,
62                                  INetURLObject::EncodeMechanism eEncodeMechanism,
63                                  INetURLObject::DecodeMechanism eDecodeMechanism,
64                                  rtl_TextEncoding eCharset,
65                                  FSysStyle eStyle)
66 {
67     // Backwards compatibility:
68     if( rTheRelURIRef.startsWith("#") )
69         return rTheRelURIRef;
70 
71     INetURLObject aAbsURIRef;
72     if (rTheBaseURIRef.HasError())
73         aAbsURIRef. SetSmartURL(rTheRelURIRef, eEncodeMechanism, eCharset, eStyle);
74     else
75     {
76         bool bWasAbsolute;
77         aAbsURIRef = rTheBaseURIRef.smartRel2Abs(rTheRelURIRef,
78                                                  bWasAbsolute,
79                                                  bIgnoreFragment,
80                                                  eEncodeMechanism,
81                                                  eCharset,
82                                                  false/*bRelativeNonURIs*/,
83                                                  eStyle);
84         if (bCheckFileExists
85             && !bWasAbsolute
86             && (aAbsURIRef.GetProtocol() == INetProtocol::File))
87         {
88             INetURLObject aNonFileURIRef;
89             aNonFileURIRef.SetSmartURL(rTheRelURIRef,
90                                        eEncodeMechanism,
91                                        eCharset,
92                                        eStyle);
93             if (!aNonFileURIRef.HasError()
94                 && aNonFileURIRef.GetProtocol() != INetProtocol::File)
95             {
96                 bool bMaybeFile = false;
97                 if (rMaybeFileHdl.IsSet())
98                 {
99                     OUString aFilePath(rTheRelURIRef);
100                     bMaybeFile = rMaybeFileHdl.Call(&aFilePath);
101                 }
102                 if (!bMaybeFile)
103                     aAbsURIRef = aNonFileURIRef;
104             }
105         }
106     }
107     return aAbsURIRef.GetMainURL(eDecodeMechanism, eCharset);
108 }
109 
110 namespace { Link<OUString *, bool> gMaybeFileHdl; }
111 
112 void URIHelper::SetMaybeFileHdl(Link<OUString *, bool> const & rTheMaybeFileHdl)
113 {
114     gMaybeFileHdl = rTheMaybeFileHdl;
115 }
116 
117 Link<OUString *, bool> const & URIHelper::GetMaybeFileHdl()
118 {
119     return gMaybeFileHdl;
120 }
121 
122 namespace {
123 
124 bool isAbsoluteHierarchicalUriReference(
125     css::uno::Reference< css::uri::XUriReference > const & uriReference)
126 {
127     return uriReference.is() && uriReference->isAbsolute()
128         && !uriReference->hasRelativePath();
129 }
130 
131 // To improve performance, assume that if for any prefix URL of a given
132 // hierarchical URL either a UCB content cannot be created, or the UCB content
133 // does not support the getCasePreservingURL command, then this will hold for
134 // any other prefix URL of the given URL, too:
135 enum Result { Success, GeneralFailure, SpecificFailure };
136 
137 Result normalizePrefix( css::uno::Reference< css::ucb::XUniversalContentBroker > const & broker,
138                         OUString const & uri, OUString * normalized)
139 {
140     OSL_ASSERT(broker.is() && normalized != nullptr);
141     css::uno::Reference< css::ucb::XContent > content;
142     try {
143         content = broker->queryContent(broker->createContentIdentifier(uri));
144     } catch (css::ucb::IllegalIdentifierException &) {}
145     if (!content.is()) {
146         return GeneralFailure;
147     }
148     try {
149         bool ok =
150             (css::uno::Reference< css::ucb::XCommandProcessor >(
151                    content, css::uno::UNO_QUERY_THROW)->execute(
152                        css::ucb::Command("getCasePreservingURL",
153                            -1, css::uno::Any()),
154                        0,
155                        css::uno::Reference< css::ucb::XCommandEnvironment >())
156                >>= *normalized);
157         OSL_ASSERT(ok);
158     } catch (css::uno::RuntimeException &) {
159         throw;
160     } catch (css::ucb::UnsupportedCommandException &) {
161         return GeneralFailure;
162     } catch (css::uno::Exception &) {
163         return SpecificFailure;
164     }
165     return Success;
166 }
167 
168 OUString normalize(
169     css::uno::Reference< css::ucb::XUniversalContentBroker > const & broker,
170     css::uno::Reference< css::uri::XUriReferenceFactory > const & uriFactory,
171     OUString const & uriReference)
172 {
173     // normalizePrefix can potentially fail (a typically example being a file
174     // URL that denotes a non-existing resource); in such a case, try to
175     // normalize as long a prefix of the given URL as possible (i.e., normalize
176     // all the existing directories within the path):
177     OUString normalized;
178     sal_Int32 n = uriReference.indexOf('#');
179     normalized = n == -1 ? uriReference : uriReference.copy(0, n);
180     switch (normalizePrefix(broker, normalized, &normalized)) {
181     case Success:
182         return n == -1 ? normalized : normalized + uriReference.subView(n);
183     case GeneralFailure:
184         return uriReference;
185     case SpecificFailure:
186     default:
187         break;
188     }
189     css::uno::Reference< css::uri::XUriReference > ref(
190         uriFactory->parse(uriReference));
191     if (!isAbsoluteHierarchicalUriReference(ref)) {
192         return uriReference;
193     }
194     sal_Int32 count = ref->getPathSegmentCount();
195     if (count < 2) {
196         return uriReference;
197     }
198     OUStringBuffer head(ref->getScheme());
199     head.append(':');
200     if (ref->hasAuthority()) {
201         head.append("//");
202         head.append(ref->getAuthority());
203     }
204     for (sal_Int32 i = count - 1; i > 0; --i) {
205         OUStringBuffer buf(head);
206         for (sal_Int32 j = 0; j < i; ++j) {
207             buf.append('/');
208             buf.append(ref->getPathSegment(j));
209         }
210         normalized = buf.makeStringAndClear();
211         if (normalizePrefix(broker, normalized, &normalized) != SpecificFailure)
212         {
213             buf.append(normalized);
214             css::uno::Reference< css::uri::XUriReference > preRef(
215                 uriFactory->parse(normalized));
216             if (!isAbsoluteHierarchicalUriReference(preRef)) {
217                 // This could only happen if something is inconsistent:
218                 break;
219             }
220             sal_Int32 preCount = preRef->getPathSegmentCount();
221             // normalizePrefix may have added or removed a final slash:
222             if (preCount != i) {
223                 if (preCount == i - 1) {
224                     buf.append('/');
225                 } else if (preCount - 1 == i && !buf.isEmpty()
226                            && buf[buf.getLength() - 1] == '/')
227                 {
228                     buf.setLength(buf.getLength() - 1);
229                 } else {
230                     // This could only happen if something is inconsistent:
231                     break;
232                 }
233             }
234             for (sal_Int32 j = i; j < count; ++j) {
235                 buf.append('/');
236                 buf.append(ref->getPathSegment(j));
237             }
238             if (ref->hasQuery()) {
239                 buf.append('?');
240                 buf.append(ref->getQuery());
241             }
242             if (ref->hasFragment()) {
243                 buf.append('#');
244                 buf.append(ref->getFragment());
245             }
246             return buf.makeStringAndClear();
247         }
248     }
249     return uriReference;
250 }
251 
252 }
253 
254 css::uno::Reference< css::uri::XUriReference >
255 URIHelper::normalizedMakeRelative(
256     css::uno::Reference< css::uno::XComponentContext > const & context,
257     OUString const & baseUriReference, OUString const & uriReference)
258 {
259     OSL_ASSERT(context.is());
260     css::uno::Reference< css::ucb::XUniversalContentBroker > broker(
261         css::ucb::UniversalContentBroker::create(context));
262     css::uno::Reference< css::uri::XUriReferenceFactory > uriFactory(
263         css::uri::UriReferenceFactory::create(context));
264     return uriFactory->makeRelative(
265         uriFactory->parse(normalize(broker, uriFactory, baseUriReference)),
266         uriFactory->parse(normalize(broker, uriFactory, uriReference)), true,
267         true, false);
268 }
269 
270 OUString URIHelper::simpleNormalizedMakeRelative(
271     OUString const & baseUriReference, OUString const & uriReference)
272 {
273     css::uno::Reference< css::uri::XUriReference > rel(
274         URIHelper::normalizedMakeRelative(
275             comphelper::getProcessComponentContext(), baseUriReference,
276             uriReference));
277     return rel.is() ? rel->getUriReference() : uriReference;
278 }
279 
280 
281 //  FindFirstURLInText
282 
283 
284 namespace {
285 
286 sal_Int32 nextChar(OUString const & rStr, sal_Int32 nPos)
287 {
288     return rtl::isHighSurrogate(rStr[nPos])
289            && rStr.getLength() - nPos >= 2
290            && rtl::isLowSurrogate(rStr[nPos + 1]) ?
291         nPos + 2 : nPos + 1;
292 }
293 
294 bool isBoundary1(CharClass const & rCharClass, OUString const & rStr,
295                  sal_Int32 nPos, sal_Int32 nEnd)
296 {
297     if (nPos == nEnd)
298         return true;
299     if (rCharClass.isLetterNumeric(rStr, nPos))
300         return false;
301     switch (rStr[nPos])
302     {
303     case '$':
304     case '%':
305     case '&':
306     case '-':
307     case '/':
308     case '@':
309     case '\\':
310         return false;
311     default:
312         return true;
313     }
314 }
315 
316 bool isBoundary2(CharClass const & rCharClass, OUString const & rStr,
317                  sal_Int32 nPos, sal_Int32 nEnd)
318 {
319     if (nPos == nEnd)
320         return true;
321     if (rCharClass.isLetterNumeric(rStr, nPos))
322         return false;
323     switch (rStr[nPos])
324     {
325     case '!':
326     case '#':
327     case '$':
328     case '%':
329     case '&':
330     case '\'':
331     case '*':
332     case '+':
333     case '-':
334     case '/':
335     case '=':
336     case '?':
337     case '@':
338     case '^':
339     case '_':
340     case '`':
341     case '{':
342     case '|':
343     case '}':
344     case '~':
345         return false;
346     default:
347         return true;
348     }
349 }
350 
351 bool checkWChar(CharClass const & rCharClass, OUString const & rStr,
352                 sal_Int32 * pPos, sal_Int32 * pEnd, bool bBackslash = false,
353                 bool bPipe = false)
354 {
355     sal_Unicode c = rStr[*pPos];
356     if (rtl::isAscii(c))
357     {
358         static sal_uInt8 const aMap[128]
359             = { 0, 0, 0, 0, 0, 0, 0, 0,
360                 0, 0, 0, 0, 0, 0, 0, 0,
361                 0, 0, 0, 0, 0, 0, 0, 0,
362                 0, 0, 0, 0, 0, 0, 0, 0,
363                 0, 1, 0, 0, 4, 4, 4, 1,   //  !"#$%&'
364                 1, 1, 1, 1, 1, 4, 1, 4,   // ()*+,-./
365                 4, 4, 4, 4, 4, 4, 4, 4,   // 01234567
366                 4, 4, 1, 1, 0, 1, 0, 1,   // 89:;<=>?
367                 4, 4, 4, 4, 4, 4, 4, 4,   // @ABCDEFG
368                 4, 4, 4, 4, 4, 4, 4, 4,   // HIJKLMNO
369                 4, 4, 4, 4, 4, 4, 4, 4,   // PQRSTUVW
370                 4, 4, 4, 1, 2, 1, 0, 1,   // XYZ[\]^_
371                 0, 4, 4, 4, 4, 4, 4, 4,   // `abcdefg
372                 4, 4, 4, 4, 4, 4, 4, 4,   // hijklmno
373                 4, 4, 4, 4, 4, 4, 4, 4,   // pqrstuvw
374                 4, 4, 4, 0, 3, 0, 1, 0 }; // xyz{|}~
375         switch (aMap[c])
376         {
377             default: // not uric
378                 return false;
379 
380             case 1: // uric
381                 ++(*pPos);
382                 return true;
383 
384             case 2: // "\"
385                 if (bBackslash)
386                 {
387                     *pEnd = ++(*pPos);
388                     return true;
389                 }
390                 else
391                     return false;
392 
393             case 3: // "|"
394                 if (bPipe)
395                 {
396                     *pEnd = ++(*pPos);
397                     return true;
398                 }
399                 else
400                     return false;
401 
402             case 4: // alpha, digit, "$", "%", "&", "-", "/", "@" (see
403                     // isBoundary1)
404                 *pEnd = ++(*pPos);
405                 return true;
406         }
407     }
408     else if (rCharClass.isLetterNumeric(rStr, *pPos))
409     {
410         *pEnd = *pPos = nextChar(rStr, *pPos);
411         return true;
412     }
413     else
414         return false;
415 }
416 
417 sal_uInt32 scanDomain(OUString const & rStr, sal_Int32 * pPos,
418                       sal_Int32 nEnd)
419 {
420     sal_Unicode const * pBuffer = rStr.getStr();
421     sal_Unicode const * p = pBuffer + *pPos;
422     sal_uInt32 nLabels = INetURLObject::scanDomain(p, pBuffer + nEnd, false);
423     *pPos = sal::static_int_cast< sal_Int32 >(p - pBuffer);
424     return nLabels;
425 }
426 
427 }
428 
429 OUString URIHelper::FindFirstURLInText(OUString const & rText,
430                                        sal_Int32 & rBegin,
431                                        sal_Int32 & rEnd,
432                                        CharClass const & rCharClass,
433                                        INetURLObject::EncodeMechanism eMechanism,
434                                        rtl_TextEncoding eCharset)
435 {
436     if (rBegin > rEnd || rEnd > rText.getLength())
437         return OUString();
438 
439     // Search for the first substring of [rBegin..rEnd[ that matches any of the
440     // following productions (for which the appropriate style bit is set in
441     // eStyle, if applicable).
442 
443     // 1st Production (known scheme):
444     //    \B1 <one of the known schemes, except file> ":" 1*wchar ["#" 1*wchar]
445     //        \B1
446 
447     // 2nd Production (file):
448     //    \B1 "FILE:" 1*(wchar / "\" / "|") ["#" 1*wchar] \B1
449 
450     // 3rd Production (ftp):
451     //    \B1 "FTP" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
452 
453     // 4th Production (http):
454     //    \B1 "WWW" 2*("." label) ["/" *wchar] ["#" 1*wchar] \B1
455 
456     // 5th Production (mailto):
457     //    \B2 local-part "@" domain \B1
458 
459     // 6th Production (UNC file):
460     //    \B1 "\\" domain "\" *(wchar / "\") \B1
461 
462     // 7th Production (DOS file):
463     //    \B1 ALPHA ":\" *(wchar / "\") \B1
464 
465     // 8th Production (Unix-like DOS file):
466     //    \B1 ALPHA ":/" *(wchar / "\") \B1
467 
468     // The productions use the following auxiliary rules.
469 
470     //    local-part = atom *("." atom)
471     //    atom = 1*(alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "*" / "+"
472     //              / "-" / "/" / "=" / "?" / "^" / "_" / "`" / "{" / "|" / "}"
473     //              / "~")
474     //    domain = label *("." label)
475     //    label = alphanum [*(alphanum / "-") alphanum]
476     //    alphanum = ALPHA / DIGIT
477     //    wchar = <any uric character (ignoring the escaped rule), or "%", or
478     //             a letter or digit (according to rCharClass)>
479 
480     // "\B1" (boundary 1) stands for the beginning or end of the block of text,
481     // or a character that is neither (a) a letter or digit (according to
482     // rCharClass), nor (b) any of "$", "%", "&", "-", "/", "@", or "\".
483     // (FIXME:  What was the rationale for this set of punctuation characters?)
484 
485     // "\B2" (boundary 2) stands for the beginning or end of the block of text,
486     // or a character that is neither (a) a letter or digit (according to
487     // rCharClass), nor (b) any of "!", "#", "$", "%", "&", "'", "*", "+", "-",
488     // "/", "=", "?", "@", "^", "_", "`", "{", "|", "}", or "~" (i.e., an RFC
489     // 822 <atom> character, or "@" from \B1's set above).
490 
491     // Productions 1--4, and 6--8 try to find a maximum-length match, but they
492     // stop at the first <wchar> character that is a "\B1" character which is
493     // only followed by "\B1" characters (taking "\" and "|" characters into
494     // account appropriately).  Production 5 simply tries to find a maximum-
495     // length match.
496 
497     // Productions 1--4 use the given eMechanism and eCharset.  Productions 5--9
498     // use EncodeMechanism::All.
499 
500     // Productions 6--9 are only applicable if the FSysStyle::Dos bit is set in
501     // eStyle.
502 
503     bool bBoundary1 = true;
504     bool bBoundary2 = true;
505     for (sal_Int32 nPos = rBegin; nPos != rEnd; nPos = nextChar(rText, nPos))
506     {
507         sal_Unicode c = rText[nPos];
508         if (bBoundary1)
509         {
510             if (rtl::isAsciiAlpha(c))
511             {
512                 sal_Int32 i = nPos;
513                 INetProtocol eScheme = INetURLObject::CompareProtocolScheme(rText.copy(i, rEnd - i));
514                 if (eScheme == INetProtocol::File) // 2nd
515                 {
516                     while (rText[i++] != ':') ;
517                     sal_Int32 nPrefixEnd = i;
518                     sal_Int32 nUriEnd = i;
519                     while (i != rEnd
520                            && checkWChar(rCharClass, rText, &i, &nUriEnd, true,
521                                          true)) ;
522                     if (i != nPrefixEnd && i != rEnd && rText[i] == '#')
523                     {
524                         ++i;
525                         while (i != rEnd
526                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
527                     }
528                     if (nUriEnd != nPrefixEnd
529                         && isBoundary1(rCharClass, rText, nUriEnd, rEnd))
530                     {
531                         INetURLObject aUri(rText.copy(nPos, nUriEnd - nPos),
532                                            INetProtocol::File, eMechanism, eCharset,
533                                            FSysStyle::Detect);
534                         if (!aUri.HasError())
535                         {
536                             rBegin = nPos;
537                             rEnd = nUriEnd;
538                             return
539                                 aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
540                         }
541                     }
542                 }
543                 else if (eScheme != INetProtocol::NotValid) // 1st
544                 {
545                     while (rText[i++] != ':') ;
546                     sal_Int32 nPrefixEnd = i;
547                     sal_Int32 nUriEnd = i;
548                     while (i != rEnd
549                            && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
550                     if (i != nPrefixEnd && i != rEnd && rText[i] == '#')
551                     {
552                         ++i;
553                         while (i != rEnd
554                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
555                     }
556                     if (nUriEnd != nPrefixEnd
557                         && (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
558                             || rText[nUriEnd] == '\\'))
559                     {
560                         INetURLObject aUri(rText.copy(nPos, nUriEnd - nPos),
561                                            INetProtocol::Http, eMechanism,
562                                            eCharset);
563                         if (!aUri.HasError())
564                         {
565                             rBegin = nPos;
566                             rEnd = nUriEnd;
567                             return
568                                 aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
569                         }
570                     }
571                 }
572 
573                 // 3rd, 4th:
574                 i = nPos;
575                 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
576                 if (nLabels >= 3
577                     && rText[nPos + 3] == '.'
578                     && (((rText[nPos] == 'w'
579                           || rText[nPos] == 'W')
580                          && (rText[nPos + 1] == 'w'
581                              || rText[nPos + 1] == 'W')
582                          && (rText[nPos + 2] == 'w'
583                              || rText[nPos + 2] == 'W'))
584                         || ((rText[nPos] == 'f'
585                              || rText[nPos] == 'F')
586                             && (rText[nPos + 1] == 't'
587                                 || rText[nPos + 1] == 'T')
588                             && (rText[nPos + 2] == 'p'
589                                 || rText[nPos + 2] == 'P'))))
590                     // (note that rText.GetChar(nPos + 3) is guaranteed to be
591                     // valid)
592                 {
593                     sal_Int32 nUriEnd = i;
594                     if (i != rEnd && rText[i] == '/')
595                     {
596                         nUriEnd = ++i;
597                         while (i != rEnd
598                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
599                     }
600                     if (i != rEnd && rText[i] == '#')
601                     {
602                         ++i;
603                         while (i != rEnd
604                                && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
605                     }
606                     if (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
607                         || rText[nUriEnd] == '\\')
608                     {
609                         INetURLObject aUri(rText.copy(nPos, nUriEnd - nPos),
610                                            INetProtocol::Http, eMechanism,
611                                            eCharset);
612                         if (!aUri.HasError())
613                         {
614                             rBegin = nPos;
615                             rEnd = nUriEnd;
616                             return
617                                 aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
618                         }
619                     }
620                 }
621 
622                 if (rEnd - nPos >= 3
623                     && rText[nPos + 1] == ':'
624                     && (rText[nPos + 2] == '/'
625                         || rText[nPos + 2] == '\\')) // 7th, 8th
626                 {
627                     i = nPos + 3;
628                     sal_Int32 nUriEnd = i;
629                     while (i != rEnd
630                            && checkWChar(rCharClass, rText, &i, &nUriEnd)) ;
631                     if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
632                     {
633                         INetURLObject aUri(rText.copy(nPos, nUriEnd - nPos),
634                                            INetProtocol::File,
635                                            INetURLObject::EncodeMechanism::All,
636                                            RTL_TEXTENCODING_UTF8,
637                                            FSysStyle::Dos);
638                         if (!aUri.HasError())
639                         {
640                             rBegin = nPos;
641                             rEnd = nUriEnd;
642                             return
643                                 aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
644                         }
645                     }
646                 }
647             }
648             else if (rEnd - nPos >= 2
649                      && rText[nPos] == '\\'
650                      && rText[nPos + 1] == '\\') // 6th
651             {
652                 sal_Int32 i = nPos + 2;
653                 sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
654                 if (nLabels >= 1 && i != rEnd && rText[i] == '\\')
655                 {
656                     sal_Int32 nUriEnd = ++i;
657                     while (i != rEnd
658                            && checkWChar(rCharClass, rText, &i, &nUriEnd,
659                                          true)) ;
660                     if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
661                     {
662                         INetURLObject aUri(rText.copy(nPos, nUriEnd - nPos),
663                                            INetProtocol::File,
664                                            INetURLObject::EncodeMechanism::All,
665                                            RTL_TEXTENCODING_UTF8,
666                                            FSysStyle::Dos);
667                         if (!aUri.HasError())
668                         {
669                             rBegin = nPos;
670                             rEnd = nUriEnd;
671                             return
672                                 aUri.GetMainURL(INetURLObject::DecodeMechanism::ToIUri);
673                         }
674                     }
675                 }
676             }
677         }
678         if (bBoundary2 && INetMIME::isAtomChar(c)) // 5th
679         {
680             bool bDot = false;
681             for (sal_Int32 i = nPos + 1; i != rEnd; ++i)
682             {
683                 sal_Unicode c2 = rText[i];
684                 if (INetMIME::isAtomChar(c2))
685                     bDot = false;
686                 else if (bDot)
687                     break;
688                 else if (c2 == '.')
689                     bDot = true;
690                 else
691                 {
692                     if (c2 == '@')
693                     {
694                         ++i;
695                         sal_uInt32 nLabels = scanDomain(rText, &i, rEnd);
696                         if (nLabels >= 1
697                             && isBoundary1(rCharClass, rText, i, rEnd))
698                         {
699                             INetURLObject aUri(rText.copy(nPos, i - nPos),
700                                                INetProtocol::Mailto,
701                                                INetURLObject::EncodeMechanism::All);
702                             if (!aUri.HasError())
703                             {
704                                 rBegin = nPos;
705                                 rEnd = i;
706                                 return aUri.GetMainURL(
707                                            INetURLObject::DecodeMechanism::ToIUri);
708                             }
709                         }
710                     }
711                     break;
712                 }
713             }
714         }
715         bBoundary1 = isBoundary1(rCharClass, rText, nPos, rEnd);
716         bBoundary2 = isBoundary2(rCharClass, rText, nPos, rEnd);
717     }
718     rBegin = rEnd;
719     return OUString();
720 }
721 
722 OUString URIHelper::removePassword(OUString const & rURI,
723                                    INetURLObject::EncodeMechanism eEncodeMechanism,
724                                    INetURLObject::DecodeMechanism eDecodeMechanism,
725                                    rtl_TextEncoding eCharset)
726 {
727     INetURLObject aObj(rURI, eEncodeMechanism, eCharset);
728     return aObj.HasError() ?
729                rURI :
730                aObj.GetURLNoPass(eDecodeMechanism, eCharset);
731 }
732 
733 OUString URIHelper::resolveIdnaHost(OUString const & url) {
734     css::uno::Reference<css::uri::XUriReference> uri(
735         css::uri::UriReferenceFactory::create(
736             comphelper::getProcessComponentContext())
737         ->parse(url));
738     if (!(uri.is() && uri->hasAuthority())) {
739         return url;
740     }
741     auto auth(uri->getAuthority());
742     if (auth.isEmpty())
743         return url;
744     sal_Int32 hostStart = auth.indexOf('@') + 1;
745     sal_Int32 hostEnd = auth.getLength();
746     while (hostEnd > hostStart && rtl::isAsciiDigit(auth[hostEnd - 1])) {
747         --hostEnd;
748     }
749     if (hostEnd > hostStart && auth[hostEnd - 1] == ':') {
750         --hostEnd;
751     } else {
752         hostEnd = auth.getLength();
753     }
754     auto asciiOnly = true;
755     for (auto i = hostStart; i != hostEnd; ++i) {
756         if (!rtl::isAscii(auth[i])) {
757             asciiOnly = false;
758             break;
759         }
760     }
761     if (asciiOnly) {
762         // Avoid icu::IDNA case normalization in purely non-IDNA domain names:
763         return url;
764     }
765     UErrorCode e = U_ZERO_ERROR;
766     std::unique_ptr<icu::IDNA> idna(
767         icu::IDNA::createUTS46Instance(
768             (UIDNA_USE_STD3_RULES | UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ
769 #if U_ICU_VERSION_MAJOR_NUM >= 49
770              | UIDNA_CHECK_CONTEXTO
771 #endif
772              ),
773             e));
774     if (U_FAILURE(e)) {
775         SAL_WARN("vcl.gdi", "icu::IDNA::createUTS46Instance " << e);
776         return url;
777     }
778     icu::UnicodeString ascii;
779     icu::IDNAInfo info;
780     idna->nameToASCII(
781         icu::UnicodeString(
782             reinterpret_cast<UChar const *>(auth.getStr() + hostStart),
783             hostEnd - hostStart),
784         ascii, info, e);
785     if (U_FAILURE(e) || info.hasErrors()) {
786         return url;
787     }
788     OUStringBuffer buf(uri->getScheme());
789     buf.append(OUString::Concat("://") + auth.subView(0, hostStart));
790     buf.append(
791         reinterpret_cast<sal_Unicode const *>(ascii.getBuffer()),
792         ascii.length());
793     buf.append(auth.subView(hostEnd) + uri->getPath());
794     if (uri->hasQuery()) {
795         buf.append("?" + uri->getQuery());
796     }
797     if (uri->hasFragment()) {
798         buf.append("#" + uri->getFragment());
799     }
800     return buf.makeStringAndClear();
801 }
802 
803 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
804