1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 2 /* 3 * This file is part of the LibreOffice project. 4 * 5 * This Source Code Form is subject to the terms of the Mozilla Public 6 * License, v. 2.0. If a copy of the MPL was not distributed with this 7 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 8 * 9 * This file incorporates work covered by the following license notice: 10 * 11 * Licensed to the Apache Software Foundation (ASF) under one or more 12 * contributor license agreements. See the NOTICE file distributed 13 * with this work for additional information regarding copyright 14 * ownership. The ASF licenses this file to you under the Apache 15 * License, Version 2.0 (the "License"); you may not use this file 16 * except in compliance with the License. You may obtain a copy of 17 * the License at http://www.apache.org/licenses/LICENSE-2.0 . 18 */ 19 20 #include "typedetection.hxx" 21 #include "constant.hxx" 22 23 #include <com/sun/star/document/XExtendedFilterDetection.hpp> 24 #include <com/sun/star/frame/Desktop.hpp> 25 #include <com/sun/star/util/URLTransformer.hpp> 26 #include <com/sun/star/util/XURLTransformer.hpp> 27 28 #include <com/sun/star/io/XInputStream.hpp> 29 #include <com/sun/star/io/XSeekable.hpp> 30 #include <com/sun/star/task/XInteractionHandler.hpp> 31 #include <tools/wldcrd.hxx> 32 #include <rtl/ustrbuf.hxx> 33 #include <sal/log.hxx> 34 #include <framework/interaction.hxx> 35 #include <tools/diagnose_ex.h> 36 #include <tools/urlobj.hxx> 37 #include <comphelper/fileurl.hxx> 38 #include <comphelper/processfactory.hxx> 39 #include <comphelper/sequence.hxx> 40 41 #define DEBUG_TYPE_DETECTION 0 42 43 #if DEBUG_TYPE_DETECTION 44 #include <iostream> 45 using std::cout; 46 using std::endl; 47 #endif 48 49 using namespace com::sun::star; 50 51 namespace filter{ 52 namespace config{ 53 54 TypeDetection::TypeDetection(const css::uno::Reference< css::uno::XComponentContext >& rxContext) 55 : m_xContext(rxContext) 56 , m_xTerminateListener(new TerminateDetection(this)) 57 , m_bCancel(false) 58 { 59 css::frame::Desktop::create(m_xContext)->addTerminateListener(m_xTerminateListener.get()); 60 BaseContainer::init(rxContext , 61 TypeDetection::impl_getImplementationName() , 62 TypeDetection::impl_getSupportedServiceNames(), 63 FilterCache::E_TYPE ); 64 } 65 66 67 TypeDetection::~TypeDetection() 68 { 69 css::frame::Desktop::create(m_xContext)->removeTerminateListener(m_xTerminateListener.get()); 70 } 71 72 73 OUString SAL_CALL TypeDetection::queryTypeByURL(const OUString& sURL) 74 { 75 OUString sType; 76 77 // SAFE -> 78 osl::MutexGuard aLock(m_aLock); 79 80 css::util::URL aURL; 81 aURL.Complete = sURL; 82 css::uno::Reference< css::util::XURLTransformer > xParser( css::util::URLTransformer::create(m_xContext) ); 83 xParser->parseStrict(aURL); 84 85 // set std types as minimum requirement first! 86 // Only in case no type was found for given URL, 87 // use optional types too ... 88 auto & cache = TheFilterCache::get(); 89 FlatDetection lFlatTypes; 90 cache.detectFlatForURL(aURL, lFlatTypes); 91 92 if ( 93 (lFlatTypes.empty() ) && 94 (!cache.isFillState(FilterCache::E_CONTAINS_TYPES)) 95 ) 96 { 97 cache.load(FilterCache::E_CONTAINS_TYPES); 98 cache.detectFlatForURL(aURL, lFlatTypes); 99 } 100 101 // first item is guaranteed as "preferred" one! 102 if (!lFlatTypes.empty()) 103 { 104 const FlatDetectionInfo& aMatch = *(lFlatTypes.begin()); 105 sType = aMatch.sType; 106 } 107 108 return sType; 109 // <- SAFE 110 } 111 112 namespace { 113 114 /** 115 * Rank format types in order of complexity. More complex formats are 116 * ranked higher so that they get tested sooner over simpler formats. 117 * 118 * Guidelines to determine how complex a format is (subject to change): 119 * 120 * 1) compressed text (XML, HTML, etc) 121 * 2) binary 122 * 3) non-compressed text 123 * 3.1) structured text 124 * 3.1.1) dialect of a structured text (e.g. docbook XML) 125 * 3.1.2) generic structured text (e.g. generic XML) 126 * 3.2) non-structured text 127 * 128 * In each category, rank them from strictly-structured to 129 * loosely-structured. 130 */ 131 int getFlatTypeRank(const OUString& rType) 132 { 133 // List formats from more complex to less complex. 134 // TODO: Add more. 135 static const char* ranks[] = { 136 137 // Compressed XML (ODF XML zip formats) 138 "writer8_template", 139 "writer8", 140 "calc8_template", 141 "calc8", 142 "impress8_template", 143 "impress8", 144 "draw8_template", 145 "draw8", 146 "chart8", 147 "math8", 148 "writerglobal8_template", 149 "writerglobal8", 150 "writerweb8_writer_template", 151 "StarBase", 152 153 // Compressed XML (OOXML) 154 "writer_OOXML_Text_Template", 155 "writer_OOXML", 156 "writer_MS_Word_2007_Template", 157 "writer_MS_Word_2007", 158 "Office Open XML Spreadsheet Template", 159 "Office Open XML Spreadsheet", 160 "MS Excel 2007 XML Template", 161 "MS Excel 2007 XML", 162 "MS PowerPoint 2007 XML Template", 163 "MS PowerPoint 2007 XML AutoPlay", 164 "MS PowerPoint 2007 XML", 165 166 // Compressed XML (Uniform/Unified Office Format) 167 "Unified_Office_Format_text", 168 "Unified_Office_Format_spreadsheet", 169 "Unified_Office_Format_presentation", 170 171 // Compressed XML (StarOffice XML zip formats) 172 "calc_StarOffice_XML_Calc", 173 "calc_StarOffice_XML_Calc_Template", 174 "chart_StarOffice_XML_Chart", 175 "draw_StarOffice_XML_Draw", 176 "draw_StarOffice_XML_Draw_Template", 177 "impress_StarOffice_XML_Impress", 178 "impress_StarOffice_XML_Impress_Template", 179 "math_StarOffice_XML_Math", 180 "writer_StarOffice_XML_Writer", 181 "writer_StarOffice_XML_Writer_Template", 182 "writer_globaldocument_StarOffice_XML_Writer_GlobalDocument", 183 "writer_web_StarOffice_XML_Writer_Web_Template", 184 185 // Compressed text 186 "pdf_Portable_Document_Format", 187 188 // Binary 189 "writer_T602_Document", 190 "writer_WordPerfect_Document", 191 "writer_MS_Works_Document", 192 "writer_MS_Word_97_Vorlage", 193 "writer_MS_Word_97", 194 "writer_MS_Word_95_Vorlage", 195 "writer_MS_Word_95", 196 "writer_MS_WinWord_60", 197 "writer_MS_WinWord_5", 198 "MS Excel 2007 Binary", 199 "calc_MS_Excel_97_VorlageTemplate", 200 "calc_MS_Excel_97", 201 "calc_MS_Excel_95_VorlageTemplate", 202 "calc_MS_Excel_95", 203 "calc_MS_Excel_5095_VorlageTemplate", 204 "calc_MS_Excel_5095", 205 "calc_MS_Excel_40_VorlageTemplate", 206 "calc_MS_Excel_40", 207 "calc_Pocket_Excel_File", 208 "impress_MS_PowerPoint_97_Vorlage", 209 "impress_MS_PowerPoint_97_AutoPlay", 210 "impress_MS_PowerPoint_97", 211 "calc_Lotus", 212 "calc_QPro", 213 "calc_SYLK", 214 "calc_DIF", 215 "calc_dBase", 216 217 // Binary (raster and vector image files) 218 "emf_MS_Windows_Metafile", 219 "wmf_MS_Windows_Metafile", 220 "met_OS2_Metafile", 221 "svm_StarView_Metafile", 222 "sgv_StarDraw_20", 223 "tif_Tag_Image_File", 224 "tga_Truevision_TARGA", 225 "sgf_StarOffice_Writer_SGF", 226 "ras_Sun_Rasterfile", 227 "psd_Adobe_Photoshop", 228 "png_Portable_Network_Graphic", 229 "jpg_JPEG", 230 "mov_MOV", 231 "gif_Graphics_Interchange", 232 "bmp_MS_Windows", 233 "pcx_Zsoft_Paintbrush", 234 "pct_Mac_Pict", 235 "pcd_Photo_CD_Base", 236 "pcd_Photo_CD_Base4", 237 "pcd_Photo_CD_Base16", 238 "impress_CGM_Computer_Graphics_Metafile", // There is binary and ascii variants ? 239 "draw_WordPerfect_Graphics", 240 "draw_Visio_Document", 241 "draw_Publisher_Document", 242 "draw_Corel_Presentation_Exchange", 243 "draw_CorelDraw_Document", 244 "writer_LotusWordPro_Document", 245 "writer_MIZI_Hwp_97", // Hanword (Hancom Office) 246 247 // Non-compressed XML 248 "writer_ODT_FlatXML", 249 "calc_ODS_FlatXML", 250 "impress_ODP_FlatXML", 251 "draw_ODG_FlatXML", 252 "calc_ADO_rowset_XML", 253 "calc_MS_Excel_2003_XML", 254 "writer_MS_Word_2003_XML", 255 "writer_DocBook_File", 256 "XHTML_File", 257 "svg_Scalable_Vector_Graphics", 258 "math_MathML_XML_Math", 259 260 // Non-compressed text 261 "dxf_AutoCAD_Interchange", 262 "eps_Encapsulated_PostScript", 263 "pbm_Portable_Bitmap", // There is 'raw' and 'ascii' variants. 264 "ppm_Portable_Pixelmap", // There is 'raw' and 'ascii' variants. 265 "pgm_Portable_Graymap", // There is 'raw' and 'ascii' variants. 266 "xpm_XPM", 267 "xbm_X_Consortium", 268 "writer_Rich_Text_Format", 269 "writer_web_HTML_help", 270 "generic_HTML", 271 272 "generic_Text", // Plain text (catch all) 273 274 // Anything ranked lower than generic_Text will never be used during 275 // type detection (since generic_Text catches all). 276 277 // Export only 278 "writer_layout_dump_xml", 279 "graphic_SWF", 280 "graphic_HTML", 281 282 // Internal use only 283 "StarBaseReportChart", 284 "StarBaseReport", 285 "math_MathType_3x", // MathType equation embedded in Word doc. 286 }; 287 288 size_t n = SAL_N_ELEMENTS(ranks); 289 290 for (size_t i = 0; i < n; ++i) 291 { 292 if (rType.equalsAscii(ranks[i])) 293 return n - i - 1; 294 } 295 296 // Not ranked. Treat them equally. Unranked formats have higher priority 297 // than the ranked internal ones since they may be defined externally. 298 return n; 299 } 300 301 /** 302 * Types with matching pattern first, then extension, then custom ranks by 303 * types, then types that are supported by the document service come next. 304 * Lastly, sort them alphabetically. 305 */ 306 struct SortByPriority 307 { 308 bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const 309 { 310 if (r1.bMatchByPattern != r2.bMatchByPattern) 311 return r1.bMatchByPattern; 312 313 if (r1.bMatchByExtension != r2.bMatchByExtension) 314 return r1.bMatchByExtension; 315 316 int rank1 = getFlatTypeRank(r1.sType); 317 int rank2 = getFlatTypeRank(r2.sType); 318 319 if (rank1 != rank2) 320 return rank1 > rank2; 321 322 if (r1.bPreselectedByDocumentService != r2.bPreselectedByDocumentService) 323 return r1.bPreselectedByDocumentService; 324 325 // All things being equal, sort them alphabetically. 326 return r1.sType > r2.sType; 327 } 328 }; 329 330 struct SortByType 331 { 332 bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const 333 { 334 return r1.sType > r2.sType; 335 } 336 }; 337 338 struct EqualByType 339 { 340 bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const 341 { 342 return r1.sType == r2.sType; 343 } 344 }; 345 346 class FindByType 347 { 348 OUString maType; 349 public: 350 explicit FindByType(const OUString& rType) : maType(rType) {} 351 bool operator() (const FlatDetectionInfo& rInfo) const 352 { 353 return rInfo.sType == maType; 354 } 355 }; 356 357 #if DEBUG_TYPE_DETECTION 358 void printFlatDetectionList(const char* caption, const FlatDetection& types) 359 { 360 cout << "-- " << caption << " (size=" << types.size() << ")" << endl; 361 for (auto const& item : types) 362 { 363 cout << " type='" << item.sType << "'; match by extension (" << item.bMatchByExtension 364 << "); match by pattern (" << item.bMatchByPattern << "); pre-selected by doc service (" 365 << item.bPreselectedByDocumentService << ")" << endl; 366 } 367 cout << "--" << endl; 368 } 369 #endif 370 371 } 372 373 OUString SAL_CALL TypeDetection::queryTypeByDescriptor(css::uno::Sequence< css::beans::PropertyValue >& lDescriptor, 374 sal_Bool bAllowDeep ) 375 { 376 // make the descriptor more usable :-) 377 utl::MediaDescriptor stlDescriptor(lDescriptor); 378 OUString sType, sURL; 379 380 try 381 { 382 // SAFE -> ---------------------------------- 383 osl::ClearableMutexGuard aLock(m_aLock); 384 385 // parse given URL to split it into e.g. main and jump marks ... 386 sURL = stlDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_URL(), OUString()); 387 388 #if OSL_DEBUG_LEVEL > 0 389 if (stlDescriptor.find( "FileName" ) != stlDescriptor.end()) 390 OSL_FAIL("Detect using of deprecated and already unsupported MediaDescriptor property \"FileName\"!"); 391 #endif 392 393 css::util::URL aURL; 394 aURL.Complete = sURL; 395 css::uno::Reference< css::util::XURLTransformer > xParser(css::util::URLTransformer::create(m_xContext)); 396 xParser->parseStrict(aURL); 397 398 OUString aSelectedFilter = stlDescriptor.getUnpackedValueOrDefault( 399 utl::MediaDescriptor::PROP_FILTERNAME(), OUString()); 400 if (!aSelectedFilter.isEmpty()) 401 { 402 // Caller specified the filter type. Honor it. Just get the default 403 // type for that filter, and bail out. 404 if (impl_validateAndSetFilterOnDescriptor(stlDescriptor, aSelectedFilter)) 405 return stlDescriptor[utl::MediaDescriptor::PROP_TYPENAME()].get<OUString>(); 406 } 407 408 FlatDetection lFlatTypes; 409 impl_getAllFormatTypes(aURL, stlDescriptor, lFlatTypes); 410 411 aLock.clear(); 412 // <- SAFE ---------------------------------- 413 414 // Properly prioritize all candidate types. 415 std::stable_sort(lFlatTypes.begin(), lFlatTypes.end(), SortByPriority()); 416 auto last = std::unique(lFlatTypes.begin(), lFlatTypes.end(), EqualByType()); 417 lFlatTypes.erase(last, lFlatTypes.end()); 418 419 OUString sLastChance; 420 421 // verify every flat detected (or preselected!) type 422 // by calling its registered deep detection service. 423 // But break this loop if a type match to the given descriptor 424 // by a URL pattern(!) or if deep detection isn't allowed from 425 // outside (bAllowDeep=sal_False) or break the whole detection by 426 // throwing an exception if creation of the might needed input 427 // stream failed by e.g. an IO exception ... 428 std::vector<OUString> lUsedDetectors; 429 if (!lFlatTypes.empty()) 430 sType = impl_detectTypeFlatAndDeep(stlDescriptor, lFlatTypes, bAllowDeep, lUsedDetectors, sLastChance); 431 432 // flat detection failed 433 // pure deep detection failed 434 // => ask might existing InteractionHandler 435 // means: ask user for its decision 436 if (sType.isEmpty() && !m_bCancel) 437 sType = impl_askUserForTypeAndFilterIfAllowed(stlDescriptor); 438 439 440 // no real detected type - but a might valid one. 441 // update descriptor and set last chance for return. 442 if (sType.isEmpty() && !sLastChance.isEmpty() && !m_bCancel) 443 { 444 OSL_FAIL("set first flat detected type without a registered deep detection service as \"last chance\" ... nevertheless some other deep detections said \"NO\". I TRY IT!"); 445 sType = sLastChance; 446 } 447 } 448 catch(const css::uno::RuntimeException&) 449 { 450 throw; 451 } 452 catch(const css::uno::Exception&) 453 { 454 TOOLS_WARN_EXCEPTION("filter.config", "caught exception while querying type of " << sURL); 455 sType.clear(); 456 } 457 458 // adapt media descriptor, so it contains the right values 459 // for type/filter name/document service/ etcpp. 460 impl_checkResultsAndAddBestFilter(stlDescriptor, sType); // Attention: sType is used as IN/OUT param here and will might be changed inside this method !!! 461 impl_validateAndSetTypeOnDescriptor(stlDescriptor, sType); 462 463 stlDescriptor >> lDescriptor; 464 return sType; 465 } 466 467 468 void TypeDetection::impl_checkResultsAndAddBestFilter(utl::MediaDescriptor& rDescriptor, 469 OUString& sType ) 470 { 471 // a) 472 // Don't overwrite a might preselected filter! 473 OUString sFilter = rDescriptor.getUnpackedValueOrDefault( 474 utl::MediaDescriptor::PROP_FILTERNAME(), 475 OUString()); 476 if (!sFilter.isEmpty()) 477 return; 478 479 auto & cache = TheFilterCache::get(); 480 481 // b) 482 // check a preselected document service too. 483 // Then we have to search a suitable filter within this module. 484 OUString sDocumentService = rDescriptor.getUnpackedValueOrDefault( 485 utl::MediaDescriptor::PROP_DOCUMENTSERVICE(), 486 OUString()); 487 if (!sDocumentService.isEmpty()) 488 { 489 try 490 { 491 OUString sRealType = sType; 492 493 // SAFE -> 494 ::osl::ResettableMutexGuard aLock(m_aLock); 495 496 // Attention: For executing next lines of code, We must be sure that 497 // all filters already loaded :-( 498 // That can disturb our "load on demand feature". But we have no other chance! 499 cache.load(FilterCache::E_CONTAINS_FILTERS); 500 501 CacheItem lIProps; 502 lIProps[PROPNAME_DOCUMENTSERVICE] <<= sDocumentService; 503 lIProps[PROPNAME_TYPE ] <<= sRealType; 504 std::vector<OUString> lFilters = cache.getMatchingItemsByProps(FilterCache::E_FILTER, lIProps); 505 506 aLock.clear(); 507 // <- SAFE 508 509 for (auto const& filter : lFilters) 510 { 511 // SAFE -> 512 aLock.reset(); 513 try 514 { 515 CacheItem aFilter = cache.getItem(FilterCache::E_FILTER, filter); 516 sal_Int32 nFlags = 0; 517 aFilter[PROPNAME_FLAGS] >>= nFlags; 518 519 if (static_cast<SfxFilterFlags>(nFlags) & SfxFilterFlags::IMPORT) 520 sFilter = filter; 521 if (static_cast<SfxFilterFlags>(nFlags) & SfxFilterFlags::PREFERED) 522 break; 523 } 524 catch(const css::uno::Exception&) {} 525 aLock.clear(); 526 // <- SAFE 527 } 528 529 if (!sFilter.isEmpty()) 530 { 531 rDescriptor[utl::MediaDescriptor::PROP_TYPENAME() ] <<= sRealType; 532 rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME()] <<= sFilter; 533 sType = sRealType; 534 return; 535 } 536 } 537 catch(const css::uno::Exception&) 538 {} 539 } 540 541 // c) 542 // We can use the preferred filter for the specified type. 543 // Such preferred filter points: 544 // - to the default filter of the preferred application 545 // - or to any other filter if no preferred filter was set. 546 // Note: It's an optimization only! 547 // It's not guaranteed, that such preferred filter exists. 548 sFilter.clear(); 549 try 550 { 551 // SAFE -> 552 osl::ClearableMutexGuard aLock(m_aLock); 553 554 CacheItem aType = cache.getItem(FilterCache::E_TYPE, sType); 555 aType[PROPNAME_PREFERREDFILTER] >>= sFilter; 556 CacheItem aFilter = cache.getItem(FilterCache::E_FILTER, sFilter); 557 558 aLock.clear(); 559 // <- SAFE 560 561 // no exception => found valid type and filter => set it on the given descriptor 562 rDescriptor[utl::MediaDescriptor::PROP_TYPENAME() ] <<= sType ; 563 rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME()] <<= sFilter; 564 return; 565 } 566 catch(const css::uno::Exception&) 567 {} 568 569 // d) 570 // Search for any import(!) filter, which is registered for this type. 571 sFilter.clear(); 572 try 573 { 574 // SAFE -> 575 ::osl::ResettableMutexGuard aLock(m_aLock); 576 577 // Attention: For executing next lines of code, We must be sure that 578 // all filters already loaded :-( 579 // That can disturb our "load on demand feature". But we have no other chance! 580 cache.load(FilterCache::E_CONTAINS_FILTERS); 581 582 CacheItem lIProps; 583 lIProps[PROPNAME_TYPE] <<= sType; 584 std::vector<OUString> lFilters = cache.getMatchingItemsByProps(FilterCache::E_FILTER, lIProps); 585 586 aLock.clear(); 587 // <- SAFE 588 589 for (auto const& filter : lFilters) 590 { 591 sFilter = filter; 592 593 // SAFE -> 594 aLock.reset(); 595 try 596 { 597 CacheItem aFilter = cache.getItem(FilterCache::E_FILTER, sFilter); 598 sal_Int32 nFlags = 0; 599 aFilter[PROPNAME_FLAGS] >>= nFlags; 600 601 if (static_cast<SfxFilterFlags>(nFlags) & SfxFilterFlags::IMPORT) 602 break; 603 } 604 catch(const css::uno::Exception&) 605 { continue; } 606 aLock.clear(); 607 // <- SAFE 608 609 sFilter.clear(); 610 } 611 612 if (!sFilter.isEmpty()) 613 { 614 rDescriptor[utl::MediaDescriptor::PROP_TYPENAME() ] <<= sType ; 615 rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME()] <<= sFilter; 616 return; 617 } 618 } 619 catch(const css::uno::Exception&) 620 {} 621 } 622 623 624 bool TypeDetection::impl_getPreselectionForType( 625 const OUString& sPreSelType, const util::URL& aParsedURL, FlatDetection& rFlatTypes, bool bDocService) 626 { 627 // Can be used to suppress execution of some parts of this method 628 // if it's already clear that detected type is valid or not. 629 // It's necessary to use shared code at the end, which update 630 // all return parameters consistency! 631 bool bBreakDetection = false; 632 633 // Further we must know if it matches by pattern 634 // Every flat detected type by pattern won't be detected deep! 635 bool bMatchByPattern = false; 636 637 // And we must know if a preselection must be preferred, because 638 // it matches by its extension too. 639 bool bMatchByExtension = false; 640 641 // validate type 642 OUString sType(sPreSelType); 643 CacheItem aType; 644 try 645 { 646 // SAFE -> -------------------------- 647 osl::MutexGuard aLock(m_aLock); 648 aType = TheFilterCache::get().getItem(FilterCache::E_TYPE, sType); 649 // <- SAFE -------------------------- 650 } 651 catch(const css::container::NoSuchElementException&) 652 { 653 sType.clear(); 654 bBreakDetection = true; 655 } 656 657 if (!bBreakDetection) 658 { 659 // We can't check a preselected type for a given stream! 660 // So we must believe, that it can work ... 661 if ( aParsedURL.Complete == "private:stream" ) 662 bBreakDetection = true; 663 } 664 665 if (!bBreakDetection) 666 { 667 // extract extension from URL .. to check it case-insensitive ! 668 INetURLObject aParser (aParsedURL.Main); 669 OUString sExtension = aParser.getExtension(INetURLObject::LAST_SEGMENT , 670 true , 671 INetURLObject::DecodeMechanism::WithCharset); 672 sExtension = sExtension.toAsciiLowerCase(); 673 674 // otherwise we must know, if it matches to the given URL really. 675 // especially if it matches by its extension or pattern registration. 676 std::vector<OUString> lExtensions(comphelper::sequenceToContainer< std::vector<OUString> >(aType[PROPNAME_EXTENSIONS].get<css::uno::Sequence<OUString> >() )); 677 std::vector<OUString> lURLPattern(comphelper::sequenceToContainer< std::vector<OUString> >(aType[PROPNAME_URLPATTERN].get<css::uno::Sequence<OUString> >() )); 678 679 for (auto const& extension : lExtensions) 680 { 681 OUString sCheckExtension(extension.toAsciiLowerCase()); 682 if (sCheckExtension == sExtension) 683 { 684 bBreakDetection = true; 685 bMatchByExtension = true; 686 break; 687 } 688 } 689 690 if (!bBreakDetection) 691 { 692 for (auto const& elem : lURLPattern) 693 { 694 WildCard aCheck(elem); 695 if (aCheck.Matches(aParsedURL.Main)) 696 { 697 bMatchByPattern = true; 698 break; 699 } 700 } 701 } 702 } 703 704 // if it's a valid type - set it on all return values! 705 if (!sType.isEmpty()) 706 { 707 FlatDetection::iterator it = std::find_if(rFlatTypes.begin(), rFlatTypes.end(), FindByType(sType)); 708 if (it != rFlatTypes.end()) 709 { 710 if (bMatchByExtension) 711 it->bMatchByExtension = true; 712 if (bMatchByPattern) 713 it->bMatchByPattern = true; 714 if (bDocService) 715 it->bPreselectedByDocumentService = true; 716 } 717 718 return true; 719 } 720 721 // not valid! 722 return false; 723 } 724 725 void TypeDetection::impl_getPreselectionForDocumentService( 726 const OUString& sPreSelDocumentService, const util::URL& aParsedURL, FlatDetection& rFlatTypes) 727 { 728 // get all filters, which match to this doc service 729 std::vector<OUString> lFilters; 730 try 731 { 732 // SAFE -> -------------------------- 733 osl::MutexGuard aLock(m_aLock); 734 735 // Attention: For executing next lines of code, We must be sure that 736 // all filters already loaded :-( 737 // That can disturb our "load on demand feature". But we have no other chance! 738 auto & cache = TheFilterCache::get(); 739 cache.load(FilterCache::E_CONTAINS_FILTERS); 740 741 CacheItem lIProps; 742 lIProps[PROPNAME_DOCUMENTSERVICE] <<= sPreSelDocumentService; 743 lFilters = cache.getMatchingItemsByProps(FilterCache::E_FILTER, lIProps); 744 // <- SAFE -------------------------- 745 } 746 catch (const css::container::NoSuchElementException&) 747 { 748 lFilters.clear(); 749 } 750 751 // step over all filters, and check if its registered type 752 // match the given URL. 753 // But use temp. list of "preselected types" instead of incoming rFlatTypes list! 754 // The reason behind: we must filter the obtained results. And copying stl entries 755 // is an easier job than removing them .-) 756 for (auto const& filter : lFilters) 757 { 758 OUString aType = impl_getTypeFromFilter(filter); 759 if (aType.isEmpty()) 760 continue; 761 762 impl_getPreselectionForType(aType, aParsedURL, rFlatTypes, true); 763 } 764 } 765 766 OUString TypeDetection::impl_getTypeFromFilter(const OUString& rFilterName) 767 { 768 CacheItem aFilter; 769 try 770 { 771 osl::MutexGuard aLock(m_aLock); 772 aFilter = TheFilterCache::get().getItem(FilterCache::E_FILTER, rFilterName); 773 } 774 catch (const container::NoSuchElementException&) 775 { 776 return OUString(); 777 } 778 779 OUString aType; 780 aFilter[PROPNAME_TYPE] >>= aType; 781 return aType; 782 } 783 784 void TypeDetection::impl_getAllFormatTypes( 785 const util::URL& aParsedURL, utl::MediaDescriptor const & rDescriptor, FlatDetection& rFlatTypes) 786 { 787 rFlatTypes.clear(); 788 789 // Get all filters that we have. 790 std::vector<OUString> aFilterNames; 791 try 792 { 793 osl::MutexGuard aLock(m_aLock); 794 auto & cache = TheFilterCache::get(); 795 cache.load(FilterCache::E_CONTAINS_FILTERS); 796 aFilterNames = cache.getItemNames(FilterCache::E_FILTER); 797 } 798 catch (const container::NoSuchElementException&) 799 { 800 return; 801 } 802 803 // Retrieve the default type for each of these filters, and store them. 804 for (auto const& filterName : aFilterNames) 805 { 806 OUString aType = impl_getTypeFromFilter(filterName); 807 808 if (aType.isEmpty()) 809 continue; 810 811 FlatDetectionInfo aInfo; // all flags set to false by default. 812 aInfo.sType = aType; 813 rFlatTypes.push_back(aInfo); 814 } 815 816 { 817 // Get all types that match the URL alone. 818 FlatDetection aFlatByURL; 819 TheFilterCache::get().detectFlatForURL(aParsedURL, aFlatByURL); 820 for (auto const& elem : aFlatByURL) 821 { 822 FlatDetection::iterator itPos = std::find_if(rFlatTypes.begin(), rFlatTypes.end(), FindByType(elem.sType)); 823 if (itPos == rFlatTypes.end()) 824 // Not in the list yet. 825 rFlatTypes.push_back(elem); 826 else 827 { 828 // Already in the list. Update the flags. 829 FlatDetectionInfo& rInfo = *itPos; 830 const FlatDetectionInfo& rThisInfo = elem; 831 if (rThisInfo.bMatchByExtension) 832 rInfo.bMatchByExtension = true; 833 if (rThisInfo.bMatchByPattern) 834 rInfo.bMatchByPattern = true; 835 if (rThisInfo.bPreselectedByDocumentService) 836 rInfo.bPreselectedByDocumentService = true; 837 } 838 } 839 } 840 841 // Remove duplicates. 842 std::stable_sort(rFlatTypes.begin(), rFlatTypes.end(), SortByType()); 843 auto last = std::unique(rFlatTypes.begin(), rFlatTypes.end(), EqualByType()); 844 rFlatTypes.erase(last, rFlatTypes.end()); 845 846 // Mark pre-selected type (if any) to have it prioritized. 847 OUString sSelectedType = rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_TYPENAME(), OUString()); 848 if (!sSelectedType.isEmpty()) 849 impl_getPreselectionForType(sSelectedType, aParsedURL, rFlatTypes, false); 850 851 // Mark all types preferred by the current document service, to have it prioritized. 852 OUString sSelectedDoc = rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_DOCUMENTSERVICE(), OUString()); 853 if (!sSelectedDoc.isEmpty()) 854 impl_getPreselectionForDocumentService(sSelectedDoc, aParsedURL, rFlatTypes); 855 } 856 857 858 OUString TypeDetection::impl_detectTypeFlatAndDeep( utl::MediaDescriptor& rDescriptor , 859 const FlatDetection& lFlatTypes , 860 bool bAllowDeep , 861 std::vector<OUString>& rUsedDetectors, 862 OUString& rLastChance ) 863 { 864 // reset it everytimes, so the outside code can distinguish between 865 // a set and a not set value. 866 rLastChance.clear(); 867 rUsedDetectors.clear(); 868 869 // step over all possible types for this URL. 870 // solutions: 871 // a) no types => no detection 872 // b) deep detection not allowed => return first valid type of list (because it's the preferred or the first valid one) 873 // or(!) match by URLPattern => in such case a deep detection will be suppressed! 874 // c) type has no detect service => safe the first occurred type without a detect service 875 // as "last chance"(!). It will be used outside of this method 876 // if no further type could be detected. 877 // It must be the first one, because it can be a preferred type. 878 // Our types list was sorted by such criteria! 879 // d) detect service return a valid result => return its decision 880 // e) detect service return an invalid result 881 // or any needed information could not be 882 // obtained from the cache => ignore it, and continue with search 883 884 for (auto const& flatTypeInfo : lFlatTypes) 885 { 886 if (m_bCancel) 887 break; 888 OUString sFlatType = flatTypeInfo.sType; 889 890 if (!impl_validateAndSetTypeOnDescriptor(rDescriptor, sFlatType)) 891 continue; 892 893 // b) 894 if ( 895 (!bAllowDeep ) || 896 (flatTypeInfo.bMatchByPattern) 897 ) 898 { 899 return sFlatType; 900 } 901 902 try 903 { 904 // SAFE -> ---------------------------------- 905 osl::ClearableMutexGuard aLock(m_aLock); 906 CacheItem aType = TheFilterCache::get().getItem(FilterCache::E_TYPE, sFlatType); 907 aLock.clear(); 908 909 OUString sDetectService; 910 aType[PROPNAME_DETECTSERVICE] >>= sDetectService; 911 912 // c) 913 if (sDetectService.isEmpty()) 914 { 915 // flat detected types without any registered deep detection service and not 916 // preselected by the user can be used as LAST CHANCE in case no other type could 917 // be detected. Of course only the first type without deep detector can be used. 918 // Further ones has to be ignored. 919 if (rLastChance.isEmpty()) 920 rLastChance = sFlatType; 921 922 continue; 923 } 924 925 // don't forget to add every real asked deep detection service here. 926 // Such detectors will be ignored if may be "impl_detectTypeDeepOnly()" 927 // must be called later! 928 rUsedDetectors.push_back(sDetectService); 929 OUString sDeepType = impl_askDetectService(sDetectService, rDescriptor); 930 931 // d) 932 if (!sDeepType.isEmpty()) 933 return sDeepType; 934 } 935 catch(const css::container::NoSuchElementException&) 936 {} 937 // e) 938 } 939 940 return OUString(); 941 // <- SAFE ---------------------------------- 942 } 943 944 void TypeDetection::impl_seekStreamToZero(utl::MediaDescriptor const & rDescriptor) 945 { 946 // try to seek to 0 ... 947 // But because XSeekable is an optional interface ... try it only .-) 948 css::uno::Reference< css::io::XInputStream > xStream = rDescriptor.getUnpackedValueOrDefault( 949 utl::MediaDescriptor::PROP_INPUTSTREAM(), 950 css::uno::Reference< css::io::XInputStream >()); 951 css::uno::Reference< css::io::XSeekable > xSeek(xStream, css::uno::UNO_QUERY); 952 if (xSeek.is()) 953 { 954 try 955 { 956 xSeek->seek(0); 957 } 958 catch(const css::uno::RuntimeException&) 959 { 960 throw; 961 } 962 catch(const css::uno::Exception&) 963 { 964 } 965 } 966 } 967 968 OUString TypeDetection::impl_askDetectService(const OUString& sDetectService, 969 utl::MediaDescriptor& rDescriptor ) 970 { 971 // Open the stream and add it to the media descriptor if this method is called for the first time. 972 // All following requests to this method will detect, that there already exists a stream .-) 973 // Attention: This method throws an exception if the stream could not be opened. 974 // It's important to break any further detection in such case. 975 // Catch it on the highest detection level only !!! 976 impl_openStream(rDescriptor); 977 978 // seek to 0 is an optional feature to be more robust against 979 // "simple implemented detect services" .-) 980 impl_seekStreamToZero(rDescriptor); 981 982 css::uno::Reference< css::document::XExtendedFilterDetection > xDetector; 983 css::uno::Reference< css::uno::XComponentContext > xContext; 984 985 // SAFE -> 986 { 987 osl::MutexGuard aLock(m_aLock); 988 xContext = m_xContext; 989 } 990 // <- SAFE 991 992 try 993 { 994 // Attention! If e.g. an office module was not installed sometimes we 995 // find a registered detect service, which is referred inside the 996 // configuration ... but not really installed. On the other side we use 997 // third party components here, which can make trouble anyway. So we 998 // should handle errors during creation of such services more 999 // gracefully .-) 1000 xDetector.set( 1001 xContext->getServiceManager()->createInstanceWithContext(sDetectService, xContext), 1002 css::uno::UNO_QUERY_THROW); 1003 } 1004 catch (...) 1005 { 1006 } 1007 1008 if ( ! xDetector.is()) 1009 return OUString(); 1010 1011 OUString sDeepType; 1012 try 1013 { 1014 // start deep detection 1015 // Don't forget to convert stl descriptor to its uno representation. 1016 1017 /* Attention! 1018 You have to use an explicit instance of this uno sequence... 1019 Because it's used as an in out parameter. And in case of a temp. used object 1020 we will run into memory corruptions! 1021 */ 1022 css::uno::Sequence< css::beans::PropertyValue > lDescriptor; 1023 rDescriptor >> lDescriptor; 1024 sDeepType = xDetector->detect(lDescriptor); 1025 rDescriptor << lDescriptor; 1026 } 1027 catch (...) 1028 { 1029 // We should ignore errors here. 1030 // Thrown exceptions mostly will end in crash recovery... 1031 // But might be we find another deep detection service which can detect the same 1032 // document without a problem .-) 1033 sDeepType.clear(); 1034 } 1035 1036 // seek to 0 is an optional feature to be more robust against 1037 // "simple implemented detect services" .-) 1038 impl_seekStreamToZero(rDescriptor); 1039 1040 // analyze the results 1041 // a) detect service returns "" => return "" too and remove TYPE/FILTER prop from descriptor 1042 // b) returned type is unknown => return "" too and remove TYPE/FILTER prop from descriptor 1043 // c) returned type is valid => check TYPE/FILTER props inside descriptor and return the type 1044 1045 // this special helper checks for a valid type 1046 // and set right values on the descriptor! 1047 bool bValidType = impl_validateAndSetTypeOnDescriptor(rDescriptor, sDeepType); 1048 if (bValidType) 1049 return sDeepType; 1050 1051 return OUString(); 1052 } 1053 1054 1055 OUString TypeDetection::impl_askUserForTypeAndFilterIfAllowed(utl::MediaDescriptor& rDescriptor) 1056 { 1057 css::uno::Reference< css::task::XInteractionHandler > xInteraction = 1058 rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_INTERACTIONHANDLER(), 1059 css::uno::Reference< css::task::XInteractionHandler >()); 1060 1061 if (!xInteraction.is()) 1062 return OUString(); 1063 1064 OUString sURL = 1065 rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_URL(), 1066 OUString()); 1067 1068 css::uno::Reference< css::io::XInputStream > xStream = 1069 rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_INPUTSTREAM(), 1070 css::uno::Reference< css::io::XInputStream >()); 1071 1072 // Don't disturb the user for "non existing files - means empty URLs" or 1073 // if we were forced to detect a stream. 1074 // Reason behind: we must be sure to ask user for "unknown contents" only... 1075 // and not for "missing files". Especially if detection is done by a stream only 1076 // we can't check if the stream points to an "existing content"! 1077 if ( 1078 (sURL.isEmpty() ) || // "non existing file" ? 1079 (!xStream.is() ) || // non existing file ! 1080 (sURL.equalsIgnoreAsciiCase("private:stream")) // not a good idea .-) 1081 ) 1082 return OUString(); 1083 1084 try 1085 { 1086 // create a new request to ask user for its decision about the usable filter 1087 ::framework::RequestFilterSelect aRequest(sURL); 1088 xInteraction->handle(aRequest.GetRequest()); 1089 1090 // "Cancel" pressed? => return with error 1091 if (aRequest.isAbort()) 1092 return OUString(); 1093 1094 // "OK" pressed => verify the selected filter, get its corresponding 1095 // type and return it. (BTW: We must update the media descriptor here ...) 1096 // The user selected explicitly a filter ... but normally we are interested on 1097 // a type here only. But we must be sure, that the selected filter is used 1098 // too and no ambiguous filter registration disturb us .-) 1099 1100 OUString sFilter = aRequest.getFilter(); 1101 if (!impl_validateAndSetFilterOnDescriptor(rDescriptor, sFilter)) 1102 return OUString(); 1103 1104 OUString sType; 1105 rDescriptor[utl::MediaDescriptor::PROP_TYPENAME()] >>= sType; 1106 return sType; 1107 } 1108 catch(const css::uno::Exception&) 1109 {} 1110 1111 return OUString(); 1112 } 1113 1114 1115 void TypeDetection::impl_openStream(utl::MediaDescriptor& rDescriptor) 1116 { 1117 bool bSuccess = false; 1118 OUString sURL = rDescriptor.getUnpackedValueOrDefault( utl::MediaDescriptor::PROP_URL(), OUString() ); 1119 bool bRequestedReadOnly = rDescriptor.getUnpackedValueOrDefault( utl::MediaDescriptor::PROP_READONLY(), false ); 1120 if ( comphelper::isFileUrl( sURL ) ) 1121 { 1122 // OOo uses own file locking mechanics in case of local file 1123 bSuccess = rDescriptor.addInputStreamOwnLock(); 1124 } 1125 else 1126 bSuccess = rDescriptor.addInputStream(); 1127 1128 if ( !bSuccess ) 1129 throw css::uno::Exception( 1130 "Could not open stream for <" + sURL + ">", 1131 static_cast<OWeakObject *>(this)); 1132 1133 if ( !bRequestedReadOnly ) 1134 { 1135 // The MediaDescriptor implementation adds ReadOnly argument if the file can not be opened for writing 1136 // this argument should be either removed or an additional argument should be added so that application 1137 // can separate the case when the user explicitly requests readonly document. 1138 // The current solution is to remove it here. 1139 rDescriptor.erase( utl::MediaDescriptor::PROP_READONLY() ); 1140 } 1141 } 1142 1143 1144 void TypeDetection::impl_removeTypeFilterFromDescriptor(utl::MediaDescriptor& rDescriptor) 1145 { 1146 utl::MediaDescriptor::iterator pItType = rDescriptor.find(utl::MediaDescriptor::PROP_TYPENAME() ); 1147 utl::MediaDescriptor::iterator pItFilter = rDescriptor.find(utl::MediaDescriptor::PROP_FILTERNAME()); 1148 if (pItType != rDescriptor.end()) 1149 rDescriptor.erase(pItType); 1150 if (pItFilter != rDescriptor.end()) 1151 rDescriptor.erase(pItFilter); 1152 } 1153 1154 1155 bool TypeDetection::impl_validateAndSetTypeOnDescriptor( utl::MediaDescriptor& rDescriptor, 1156 const OUString& sType ) 1157 { 1158 // SAFE -> 1159 { 1160 osl::MutexGuard aLock(m_aLock); 1161 if (TheFilterCache::get().hasItem(FilterCache::E_TYPE, sType)) 1162 { 1163 rDescriptor[utl::MediaDescriptor::PROP_TYPENAME()] <<= sType; 1164 return true; 1165 } 1166 } 1167 // <- SAFE 1168 1169 // remove all related information from the descriptor 1170 impl_removeTypeFilterFromDescriptor(rDescriptor); 1171 return false; 1172 } 1173 1174 1175 bool TypeDetection::impl_validateAndSetFilterOnDescriptor( utl::MediaDescriptor& rDescriptor, 1176 const OUString& sFilter ) 1177 { 1178 try 1179 { 1180 // SAFE -> 1181 osl::ClearableMutexGuard aLock(m_aLock); 1182 1183 auto & cache = TheFilterCache::get(); 1184 CacheItem aFilter = cache.getItem(FilterCache::E_FILTER, sFilter); 1185 OUString sType; 1186 aFilter[PROPNAME_TYPE] >>= sType; 1187 1188 aLock.clear(); 1189 // <- SAFE 1190 1191 // found valid type and filter => set it on the given descriptor 1192 rDescriptor[utl::MediaDescriptor::PROP_TYPENAME() ] <<= sType ; 1193 rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME()] <<= sFilter; 1194 return true; 1195 } 1196 catch(const css::container::NoSuchElementException&){} 1197 1198 // remove all related information from the descriptor 1199 impl_removeTypeFilterFromDescriptor(rDescriptor); 1200 return false; 1201 } 1202 1203 1204 OUString TypeDetection::impl_getImplementationName() 1205 { 1206 return "com.sun.star.comp.filter.config.TypeDetection"; 1207 } 1208 1209 1210 css::uno::Sequence< OUString > TypeDetection::impl_getSupportedServiceNames() 1211 { 1212 return { "com.sun.star.document.TypeDetection" }; 1213 } 1214 1215 1216 css::uno::Reference< css::uno::XInterface > TypeDetection::impl_createInstance(const css::uno::Reference< css::lang::XMultiServiceFactory >& xSMGR) 1217 { 1218 TypeDetection* pNew = new TypeDetection( comphelper::getComponentContext(xSMGR) ); 1219 return css::uno::Reference< css::uno::XInterface >(static_cast< css::document::XTypeDetection* >(pNew), css::uno::UNO_QUERY); 1220 } 1221 1222 } // namespace config 1223 } // namespace filter 1224 1225 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ 1226
