1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20 #include <oox/core/filterdetect.hxx>
21
22 #include <com/sun/star/io/XStream.hpp>
23 #include <comphelper/docpasswordhelper.hxx>
24 #include <comphelper/memorystream.hxx>
25 #include <unotools/mediadescriptor.hxx>
26 #include <cppuhelper/supportsservice.hxx>
27
28 #include <oox/core/fastparser.hxx>
29 #include <oox/helper/attributelist.hxx>
30 #include <oox/helper/zipstorage.hxx>
31 #include <oox/ole/olestorage.hxx>
32 #include <oox/token/namespaces.hxx>
33 #include <oox/token/tokens.hxx>
34
35 #include <oox/crypto/DocumentDecryption.hxx>
36
37 #include <com/sun/star/uri/UriReferenceFactory.hpp>
38 #include <com/sun/star/beans/NamedValue.hpp>
39 #include <o3tl/string_view.hxx>
40 #include <utility>
41
42 using namespace ::com::sun::star;
43
44 namespace oox::core {
45
46 using namespace ::com::sun::star::beans;
47 using namespace ::com::sun::star::io;
48 using namespace ::com::sun::star::lang;
49 using namespace ::com::sun::star::uno;
50 using namespace ::com::sun::star::xml::sax;
51 using namespace ::com::sun::star::uri;
52
53 using utl::MediaDescriptor;
54 using comphelper::IDocPasswordVerifier;
55 using comphelper::DocPasswordVerifierResult;
56
FilterDetectDocHandler(const Reference<XComponentContext> & rxContext,OUString & rFilterName,OUString aFileName)57 FilterDetectDocHandler::FilterDetectDocHandler( const Reference< XComponentContext >& rxContext, OUString& rFilterName, OUString aFileName ) :
58 mrFilterName( rFilterName ),
59 maFileName(std::move(aFileName)),
60 maOOXMLVariant( OOXMLVariant::ECMA_Transitional ),
61 mxContext( rxContext )
62 {
63 maContextStack.reserve( 2 );
64 }
65
~FilterDetectDocHandler()66 FilterDetectDocHandler::~FilterDetectDocHandler()
67 {
68 }
69
startDocument()70 void SAL_CALL FilterDetectDocHandler::startDocument()
71 {
72 }
73
endDocument()74 void SAL_CALL FilterDetectDocHandler::endDocument()
75 {
76 }
77
processingInstruction(const OUString &,const OUString &)78 void SAL_CALL FilterDetectDocHandler::processingInstruction( const OUString& /*rTarget*/, const OUString& /*rData*/ )
79 {
80 }
81
setDocumentLocator(const Reference<XLocator> &)82 void SAL_CALL FilterDetectDocHandler::setDocumentLocator( const Reference<XLocator>& /*xLocator*/ )
83 {
84 }
85
startFastElement(sal_Int32 nElement,const Reference<XFastAttributeList> & rAttribs)86 void SAL_CALL FilterDetectDocHandler::startFastElement(
87 sal_Int32 nElement, const Reference< XFastAttributeList >& rAttribs )
88 {
89 AttributeList aAttribs( rAttribs );
90 switch ( nElement )
91 {
92 // cases for word/settings.xml
93 case W_TOKEN(settings):
94 case W_TOKEN(compat):
95 break;
96 case W_TOKEN(compatSetting):
97 if (!maContextStack.empty() && (maContextStack.back() == W_TOKEN(compat)))
98 parseSettings(aAttribs);
99 break;
100
101 // cases for _rels/.rels
102 case PR_TOKEN( Relationships ):
103 break;
104 case PR_TOKEN( Relationship ):
105 if( !maContextStack.empty() && (maContextStack.back() == PR_TOKEN( Relationships )) )
106 parseRelationship( aAttribs );
107 break;
108
109 // cases for [Content_Types].xml
110 case PC_TOKEN( Types ):
111 break;
112 case PC_TOKEN( Default ):
113 if( !maContextStack.empty() && (maContextStack.back() == PC_TOKEN( Types )) )
114 parseContentTypesDefault( aAttribs );
115 break;
116 case PC_TOKEN( Override ):
117 if( !maContextStack.empty() && (maContextStack.back() == PC_TOKEN( Types )) )
118 parseContentTypesOverride( aAttribs );
119 break;
120 }
121 maContextStack.push_back( nElement );
122 }
123
startUnknownElement(const OUString &,const OUString &,const Reference<XFastAttributeList> &)124 void SAL_CALL FilterDetectDocHandler::startUnknownElement(
125 const OUString& /*Namespace*/, const OUString& /*Name*/, const Reference<XFastAttributeList>& /*Attribs*/ )
126 {
127 }
128
endFastElement(sal_Int32)129 void SAL_CALL FilterDetectDocHandler::endFastElement( sal_Int32 /*nElement*/ )
130 {
131 maContextStack.pop_back();
132 }
133
endUnknownElement(const OUString &,const OUString &)134 void SAL_CALL FilterDetectDocHandler::endUnknownElement(
135 const OUString& /*Namespace*/, const OUString& /*Name*/ )
136 {
137 }
138
createFastChildContext(sal_Int32,const Reference<XFastAttributeList> &)139 Reference<XFastContextHandler> SAL_CALL FilterDetectDocHandler::createFastChildContext(
140 sal_Int32 /*Element*/, const Reference<XFastAttributeList>& /*Attribs*/ )
141 {
142 return this;
143 }
144
createUnknownChildContext(const OUString &,const OUString &,const Reference<XFastAttributeList> &)145 Reference<XFastContextHandler> SAL_CALL FilterDetectDocHandler::createUnknownChildContext(
146 const OUString& /*Namespace*/, const OUString& /*Name*/, const Reference<XFastAttributeList>& /*Attribs*/)
147 {
148 return this;
149 }
150
characters(const OUString &)151 void SAL_CALL FilterDetectDocHandler::characters( const OUString& /*aChars*/ )
152 {
153 }
154
parseSettings(const AttributeList & rAttribs)155 void FilterDetectDocHandler::parseSettings(const AttributeList& rAttribs)
156 {
157 // tdf#131936 Remember filter when opening file as 'Office Open XML Text'
158 if (rAttribs.getStringDefaulted(W_TOKEN(name)).equalsIgnoreAsciiCase("compatibilityMode"))
159 {
160 const sal_Int32 nVal = rAttribs.getInteger(W_TOKEN(val), 12); // default to Word 2007
161 // if specified multiple times, highest value wins
162 if (nVal > 12 && maOOXMLVariant == OOXMLVariant::ECMA_Transitional)
163 maOOXMLVariant = OOXMLVariant::ISO_Transitional; // Word 2010+
164 }
165 }
166
parseRelationship(const AttributeList & rAttribs)167 void FilterDetectDocHandler::parseRelationship( const AttributeList& rAttribs )
168 {
169 OUString aType = rAttribs.getStringDefaulted( XML_Type);
170
171 // tdf#131936 Remember filter when opening file as 'Office Open XML Text'
172 if (aType.startsWithIgnoreAsciiCase("http://schemas.openxmlformats.org/officedocument/2006/relationships/metadata/core-properties"))
173 maOOXMLVariant = OOXMLVariant::ISO_Transitional;
174 else if (aType.startsWithIgnoreAsciiCase("http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"))
175 maOOXMLVariant = OOXMLVariant::ECMA_Transitional;
176 else if (aType.startsWithIgnoreAsciiCase("http://purl.oclc.org/ooxml/officeDocument"))
177 maOOXMLVariant = OOXMLVariant::ISO_Strict;
178
179 if ( aType != "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument" // OOXML Transitional
180 && aType != "http://purl.oclc.org/ooxml/officeDocument/relationships/officeDocument" ) //OOXML strict
181 return;
182
183 Reference<XUriReferenceFactory> xFactory = UriReferenceFactory::create( mxContext );
184 try
185 {
186 // use '/' to represent the root of the zip package ( and provide a 'file' scheme to
187 // keep the XUriReference implementation happy )
188 Reference< XUriReference > xBase = xFactory->parse( u"file:///"_ustr );
189
190 Reference< XUriReference > xPart = xFactory->parse( rAttribs.getStringDefaulted( XML_Target) );
191 Reference< XUriReference > xAbs = xFactory->makeAbsolute( xBase, xPart, true, RelativeUriExcessParentSegments_RETAIN );
192
193 if ( xAbs.is() )
194 maTargetPath = xAbs->getPath();
195 }
196 catch( const Exception& )
197 {
198 }
199 }
200
getFilterNameFromContentType(std::u16string_view rContentType,std::u16string_view rFileName)201 OUString FilterDetectDocHandler::getFilterNameFromContentType( std::u16string_view rContentType, std::u16string_view rFileName )
202 {
203 bool bDocm = o3tl::endsWithIgnoreAsciiCase(rFileName, ".docm");
204
205 if( rContentType == u"application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml" && !bDocm )
206 {
207 switch (maOOXMLVariant)
208 {
209 case OOXMLVariant::ISO_Transitional:
210 case OOXMLVariant::ISO_Strict: // Not supported, map to ISO transitional
211 return u"writer_OOXML"_ustr;
212 case OOXMLVariant::ECMA_Transitional:
213 return u"writer_MS_Word_2007"_ustr;
214 }
215 }
216
217 if( rContentType == u"application/vnd.ms-word.document.macroEnabled.main+xml" || bDocm )
218 return u"writer_MS_Word_2007_VBA"_ustr;
219
220 if( rContentType == u"application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml" ||
221 rContentType == u"application/vnd.ms-word.template.macroEnabledTemplate.main+xml" )
222 {
223 switch (maOOXMLVariant)
224 {
225 case OOXMLVariant::ISO_Transitional:
226 case OOXMLVariant::ISO_Strict: // Not supported, map to ISO transitional
227 return u"writer_OOXML_Text_Template"_ustr;
228 case OOXMLVariant::ECMA_Transitional:
229 return u"writer_MS_Word_2007_Template"_ustr;
230 }
231 }
232
233 if( rContentType == u"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml")
234 return u"MS Excel 2007 XML"_ustr;
235
236 if (rContentType == u"application/vnd.ms-excel.sheet.macroEnabled.main+xml")
237 return u"MS Excel 2007 VBA XML"_ustr;
238
239 if( rContentType == u"application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml" ||
240 rContentType == u"application/vnd.ms-excel.template.macroEnabled.main+xml" )
241 return u"MS Excel 2007 XML Template"_ustr;
242
243 if ( rContentType == u"application/vnd.ms-excel.sheet.binary.macroEnabled.main" )
244 return u"MS Excel 2007 Binary"_ustr;
245
246 if (rContentType == u"application/vnd.openxmlformats-officedocument.presentationml.presentation.main+xml")
247 return u"MS PowerPoint 2007 XML"_ustr;
248
249 if (rContentType == u"application/vnd.ms-powerpoint.presentation.macroEnabled.main+xml")
250 return u"MS PowerPoint 2007 XML VBA"_ustr;
251
252 if( rContentType == u"application/vnd.openxmlformats-officedocument.presentationml.slideshow.main+xml" ||
253 rContentType == u"application/vnd.ms-powerpoint.slideshow.macroEnabled.main+xml" )
254 return u"MS PowerPoint 2007 XML AutoPlay"_ustr;
255
256 if( rContentType == u"application/vnd.openxmlformats-officedocument.presentationml.template.main+xml" ||
257 rContentType == u"application/vnd.ms-powerpoint.template.macroEnabled.main+xml" )
258 return u"MS PowerPoint 2007 XML Template"_ustr;
259
260 return OUString();
261 }
262
parseContentTypesDefault(const AttributeList & rAttribs)263 void FilterDetectDocHandler::parseContentTypesDefault( const AttributeList& rAttribs )
264 {
265 // only if no overridden part name found
266 if( mrFilterName.isEmpty() )
267 {
268 // check if target path ends with extension
269 OUString aExtension = rAttribs.getStringDefaulted( XML_Extension);
270 sal_Int32 nExtPos = maTargetPath.getLength() - aExtension.getLength();
271 if( (nExtPos > 0) && (maTargetPath[ nExtPos - 1 ] == '.') && maTargetPath.match( aExtension, nExtPos ) )
272 mrFilterName = getFilterNameFromContentType( rAttribs.getStringDefaulted( XML_ContentType), maFileName );
273 }
274 }
275
parseContentTypesOverride(const AttributeList & rAttribs)276 void FilterDetectDocHandler::parseContentTypesOverride( const AttributeList& rAttribs )
277 {
278 if( rAttribs.getStringDefaulted( XML_PartName) == maTargetPath )
279 mrFilterName = getFilterNameFromContentType( rAttribs.getStringDefaulted( XML_ContentType), maFileName );
280 }
281
FilterDetect(const Reference<XComponentContext> & rxContext)282 FilterDetect::FilterDetect( const Reference< XComponentContext >& rxContext ) :
283 mxContext( rxContext, UNO_SET_THROW )
284 {
285 }
286
~FilterDetect()287 FilterDetect::~FilterDetect()
288 {
289 }
290
291 namespace
292 {
293
lclIsZipPackage(const Reference<XComponentContext> & rxContext,const Reference<XInputStream> & rxInStrm,bool bRepairPackage)294 bool lclIsZipPackage( const Reference< XComponentContext >& rxContext, const Reference< XInputStream >& rxInStrm, bool bRepairPackage )
295 {
296 ZipStorage aZipStorage(rxContext, rxInStrm, bRepairPackage);
297 return aZipStorage.isStorage();
298 }
299
300 class PasswordVerifier : public IDocPasswordVerifier
301 {
302 public:
303 explicit PasswordVerifier( crypto::DocumentDecryption& aDecryptor );
304
305 virtual DocPasswordVerifierResult verifyPassword( const OUString& rPassword, Sequence<NamedValue>& rEncryptionData ) override;
306
307 virtual DocPasswordVerifierResult verifyEncryptionData( const Sequence<NamedValue>& rEncryptionData ) override;
308 private:
309 crypto::DocumentDecryption& mDecryptor;
310 };
311
PasswordVerifier(crypto::DocumentDecryption & aDecryptor)312 PasswordVerifier::PasswordVerifier( crypto::DocumentDecryption& aDecryptor ) :
313 mDecryptor(aDecryptor)
314 {}
315
verifyPassword(const OUString & rPassword,Sequence<NamedValue> & rEncryptionData)316 comphelper::DocPasswordVerifierResult PasswordVerifier::verifyPassword( const OUString& rPassword, Sequence<NamedValue>& rEncryptionData )
317 {
318 try
319 {
320 if (mDecryptor.generateEncryptionKey(rPassword))
321 rEncryptionData = mDecryptor.createEncryptionData(rPassword);
322 }
323 catch (...)
324 {
325 // Any exception is a reason to abort
326 return comphelper::DocPasswordVerifierResult::Abort;
327 }
328
329 return rEncryptionData.hasElements() ? comphelper::DocPasswordVerifierResult::OK : comphelper::DocPasswordVerifierResult::WrongPassword;
330 }
331
verifyEncryptionData(const Sequence<NamedValue> &)332 comphelper::DocPasswordVerifierResult PasswordVerifier::verifyEncryptionData( const Sequence<NamedValue>& )
333 {
334 return comphelper::DocPasswordVerifierResult::WrongPassword;
335 }
336
337 } // namespace
338
extractUnencryptedPackage(MediaDescriptor & rMediaDescriptor) const339 Reference< XInputStream > FilterDetect::extractUnencryptedPackage( MediaDescriptor& rMediaDescriptor ) const
340 {
341 const bool bRepairPackage(rMediaDescriptor.getUnpackedValueOrDefault(u"RepairPackage"_ustr, false));
342 // try the plain input stream
343 Reference<XInputStream> xInputStream( rMediaDescriptor[ MediaDescriptor::PROP_INPUTSTREAM ], UNO_QUERY );
344 if (!xInputStream.is() || lclIsZipPackage(mxContext, xInputStream, bRepairPackage))
345 return xInputStream;
346
347 // check if a temporary file is passed in the 'ComponentData' property
348 Reference<XStream> xDecrypted( rMediaDescriptor.getComponentDataEntry( u"DecryptedPackage"_ustr ), UNO_QUERY );
349 if( xDecrypted.is() )
350 {
351 Reference<XInputStream> xDecryptedInputStream = xDecrypted->getInputStream();
352 if (lclIsZipPackage(mxContext, xDecryptedInputStream, bRepairPackage))
353 return xDecryptedInputStream;
354 }
355
356 // try to decrypt an encrypted OLE package
357 oox::ole::OleStorage aOleStorage( mxContext, xInputStream, false );
358 if( aOleStorage.isStorage() )
359 {
360 try
361 {
362 crypto::DocumentDecryption aDecryptor(mxContext, aOleStorage);
363
364 if( aDecryptor.readEncryptionInfo() )
365 {
366 /* "VelvetSweatshop" is the built-in default encryption
367 password used by MS Excel for the "workbook protection"
368 feature with password. Try this first before prompting the
369 user for a password. */
370 std::vector<OUString> aDefaultPasswords;
371 aDefaultPasswords.emplace_back("VelvetSweatshop");
372
373 /* Use the comphelper password helper to request a password.
374 This helper returns either with the correct password
375 (according to the verifier), or with an empty string if
376 user has cancelled the password input dialog. */
377 PasswordVerifier aVerifier( aDecryptor );
378 Sequence<NamedValue> aEncryptionData = rMediaDescriptor.requestAndVerifyDocPassword(
379 aVerifier,
380 comphelper::DocPasswordRequestType::MS,
381 &aDefaultPasswords );
382
383 if( !aEncryptionData.hasElements() )
384 {
385 rMediaDescriptor[ MediaDescriptor::PROP_ABORTED ] <<= true;
386 }
387 else
388 {
389 // create MemoryStream for unencrypted package - rather not put this in a tempfile
390 rtl::Reference< comphelper::UNOMemoryStream > xTempStream = new comphelper::UNOMemoryStream();
391
392 // if decryption was unsuccessful (corrupted file or any other reason)
393 if (!aDecryptor.decrypt(xTempStream))
394 {
395 rMediaDescriptor[ MediaDescriptor::PROP_ABORTED ] <<= true;
396 }
397 else
398 {
399 // store temp file in media descriptor to keep it alive
400 rMediaDescriptor.setComponentDataEntry( u"DecryptedPackage"_ustr, Any( Reference<XStream>(xTempStream) ) );
401
402 Reference<XInputStream> xDecryptedInputStream = xTempStream->getInputStream();
403 if (lclIsZipPackage(mxContext, xDecryptedInputStream, bRepairPackage))
404 return xDecryptedInputStream;
405 }
406 }
407 }
408 }
409 catch( const Exception& )
410 {
411 }
412 }
413 return Reference<XInputStream>();
414 }
415
416 // com.sun.star.lang.XServiceInfo interface -----------------------------------
417
getImplementationName()418 OUString SAL_CALL FilterDetect::getImplementationName()
419 {
420 return u"com.sun.star.comp.oox.FormatDetector"_ustr;
421 }
422
supportsService(const OUString & rServiceName)423 sal_Bool SAL_CALL FilterDetect::supportsService( const OUString& rServiceName )
424 {
425 return cppu::supportsService(this, rServiceName);
426 }
427
getSupportedServiceNames()428 Sequence< OUString > SAL_CALL FilterDetect::getSupportedServiceNames()
429 {
430 return { u"com.sun.star.frame.ExtendedTypeDetection"_ustr };
431 }
432
433 // com.sun.star.document.XExtendedFilterDetection interface -------------------
434
detect(Sequence<PropertyValue> & rMediaDescSeq)435 OUString SAL_CALL FilterDetect::detect( Sequence< PropertyValue >& rMediaDescSeq )
436 {
437 OUString aFilterName;
438 MediaDescriptor aMediaDescriptor( rMediaDescSeq );
439
440 try
441 {
442 aMediaDescriptor.addInputStream();
443
444 /* Get the unencrypted input stream. This may include creation of a
445 temporary file that contains the decrypted package. This temporary
446 file will be stored in the 'ComponentData' property of the media
447 descriptor. */
448 Reference< XInputStream > xInputStream( extractUnencryptedPackage( aMediaDescriptor ), UNO_SET_THROW );
449
450 // stream must be a ZIP package
451 ZipStorage aZipStorage(mxContext, xInputStream,
452 aMediaDescriptor.getUnpackedValueOrDefault(u"RepairPackage"_ustr, false));
453 if( aZipStorage.isStorage() )
454 {
455 // create the fast parser, register the XML namespaces, set document handler
456 FastParser aParser;
457 aParser.registerNamespace( NMSP_packageRel );
458 aParser.registerNamespace( NMSP_officeRel );
459 aParser.registerNamespace( NMSP_packageContentTypes );
460 aParser.registerNamespace(NMSP_doc); // for W_TOKEN
461
462 OUString aFileName;
463 aMediaDescriptor[utl::MediaDescriptor::PROP_URL] >>= aFileName;
464
465 aParser.setDocumentHandler( new FilterDetectDocHandler( mxContext, aFilterName, aFileName ) );
466
467 /* Parse '_rels/.rels' to get the target path and '[Content_Types].xml'
468 to determine the content type of the part at the target path. */
469 aParser.parseStream( aZipStorage, u"_rels/.rels"_ustr );
470 try
471 {
472 // Text documents can't use .rels to determine maOOXMLVariant. Use compatibilityMode
473 aParser.parseStream(aZipStorage, u"word/settings.xml"_ustr);
474 }
475 catch(const Exception&)
476 {
477 // not a MS Word text document, or file might not exist
478 }
479 // Order is critical: .rels and then settings.xml must be parsed before [Content_Types]
480 aParser.parseStream( aZipStorage, u"[Content_Types].xml"_ustr );
481 }
482 }
483 catch( const Exception& )
484 {
485 if ( aMediaDescriptor.getUnpackedValueOrDefault( MediaDescriptor::PROP_ABORTED, false ) )
486 /* The user chose to abort detection, e.g. by hitting 'Cancel' in the password input dialog,
487 so we have to return non-empty type name to abort the detection loop. The loading code is
488 supposed to check whether the "Aborted" flag is present in the descriptor, and to not attempt
489 to actually load the file then.
490
491 The returned type name is the one we got as an input, which typically was detected by the flat
492 detection (i.e. by file extension), so normally that's the correct one. Also at this point we
493 already know that the file is OLE encrypted package, so trying with other type detectors doesn't
494 make much sense anyway.
495 */
496 aFilterName = aMediaDescriptor.getUnpackedValueOrDefault( MediaDescriptor::PROP_TYPENAME, OUString() );
497 }
498
499 // write back changed media descriptor members
500 aMediaDescriptor >> rMediaDescSeq;
501 return aFilterName;
502 }
503
504 } // namespace oox::core
505
506 extern "C" SAL_DLLPUBLIC_EXPORT uno::XInterface*
com_sun_star_comp_oox_FormatDetector_get_implementation(uno::XComponentContext * pCtx,uno::Sequence<uno::Any> const &)507 com_sun_star_comp_oox_FormatDetector_get_implementation(uno::XComponentContext* pCtx,
508 uno::Sequence<uno::Any> const& /*rSeq*/)
509 {
510 return cppu::acquire(new oox::core::FilterDetect(pCtx));
511 }
512
513 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
514