xref: /core/sdext/source/pdfimport/filterdet.cxx (revision 07b673a1)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 
21 #include "filterdet.hxx"
22 #include "inc/pdfihelper.hxx"
23 #include "inc/pdfparse.hxx"
24 
25 #include <osl/file.h>
26 #include <osl/thread.h>
27 #include <rtl/digest.h>
28 #include <sal/log.hxx>
29 #include <com/sun/star/io/IOException.hpp>
30 #include <com/sun/star/io/XInputStream.hpp>
31 #include <com/sun/star/io/XStream.hpp>
32 #include <com/sun/star/io/XSeekable.hpp>
33 #include <com/sun/star/io/TempFile.hpp>
34 #include <com/sun/star/task/XInteractionHandler.hpp>
35 #include <comphelper/fileurl.hxx>
36 #include <comphelper/hash.hxx>
37 #include <cppuhelper/supportsservice.hxx>
38 #include <comphelper/diagnose_ex.hxx>
39 #include <tools/stream.hxx>
40 #include <memory>
41 #include <utility>
42 #include <string.h>
43 
44 using namespace com::sun::star;
45 
46 namespace pdfi
47 {
48 
49 // TODO(T3): locking/thread safety
50 
51 namespace {
52 
53 class FileEmitContext : public pdfparse::EmitContext
54 {
55 private:
56     oslFileHandle                        m_aReadHandle;
57     unsigned int                         m_nReadLen;
58     uno::Reference< io::XStream >        m_xContextStream;
59     uno::Reference< io::XSeekable >      m_xSeek;
60     uno::Reference< io::XOutputStream >  m_xOut;
61 
62 public:
63     FileEmitContext( const OUString&                            rOrigFile,
64                      const uno::Reference< uno::XComponentContext >& xContext,
65                      const pdfparse::PDFContainer*                   pTop );
66     virtual ~FileEmitContext() override;
67 
68     virtual bool         write( const void* pBuf, unsigned int nLen ) override;
69     virtual unsigned int getCurPos() override;
70     virtual bool         copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) override;
71     virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) override;
72 
getContextStream() const73     const uno::Reference< io::XStream >& getContextStream() const { return m_xContextStream; }
74 };
75 
76 }
77 
FileEmitContext(const OUString & rOrigFile,const uno::Reference<uno::XComponentContext> & xContext,const pdfparse::PDFContainer * pTop)78 FileEmitContext::FileEmitContext( const OUString&                            rOrigFile,
79                                   const uno::Reference< uno::XComponentContext >& xContext,
80                                   const pdfparse::PDFContainer*                   pTop ) :
81     pdfparse::EmitContext( pTop ),
82     m_aReadHandle(nullptr),
83     m_nReadLen(0)
84 {
85     m_xContextStream.set( io::TempFile::create(xContext), uno::UNO_QUERY_THROW );
86     m_xOut = m_xContextStream->getOutputStream();
87     m_xSeek.set(m_xOut, uno::UNO_QUERY_THROW );
88 
89     if( osl_openFile( rOrigFile.pData,
90                       &m_aReadHandle,
91                       osl_File_OpenFlag_Read ) == osl_File_E_None )
92     {
93         oslFileError aErr = osl_setFilePos( m_aReadHandle, osl_Pos_End, 0 );
94         if( aErr == osl_File_E_None )
95         {
96             sal_uInt64 nFileSize = 0;
97             if( (aErr=osl_getFilePos( m_aReadHandle,
98                                       &nFileSize )) == osl_File_E_None )
99             {
100                 m_nReadLen = static_cast<unsigned int>(nFileSize);
101             }
102         }
103         if( aErr != osl_File_E_None )
104         {
105             osl_closeFile( m_aReadHandle );
106             m_aReadHandle = nullptr;
107         }
108     }
109     m_bDeflate = true;
110 }
111 
~FileEmitContext()112 FileEmitContext::~FileEmitContext()
113 {
114     if( m_aReadHandle )
115         osl_closeFile( m_aReadHandle );
116 }
117 
write(const void * pBuf,unsigned int nLen)118 bool FileEmitContext::write( const void* pBuf, unsigned int nLen )
119 {
120     if( ! m_xOut.is() )
121         return false;
122 
123     uno::Sequence< sal_Int8 > aSeq( nLen );
124     memcpy( aSeq.getArray(), pBuf, nLen );
125     m_xOut->writeBytes( aSeq );
126     return true;
127 }
128 
getCurPos()129 unsigned int FileEmitContext::getCurPos()
130 {
131     unsigned int nPos = 0;
132     if( m_xSeek.is() )
133     {
134         nPos = static_cast<unsigned int>( m_xSeek->getPosition() );
135     }
136     return nPos;
137 }
138 
copyOrigBytes(unsigned int nOrigOffset,unsigned int nLen)139 bool FileEmitContext::copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen )
140 {
141     if( nOrigOffset + nLen > m_nReadLen )
142         return false;
143 
144     if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
145         return false;
146 
147     uno::Sequence< sal_Int8 > aSeq( nLen );
148 
149     sal_uInt64 nBytesRead = 0;
150     if( osl_readFile( m_aReadHandle,
151                       aSeq.getArray(),
152                       nLen,
153                       &nBytesRead ) != osl_File_E_None
154         || nBytesRead != static_cast<sal_uInt64>(nLen) )
155     {
156         return false;
157     }
158 
159     m_xOut->writeBytes( aSeq );
160     return true;
161 }
162 
readOrigBytes(unsigned int nOrigOffset,unsigned int nLen,void * pBuf)163 unsigned int FileEmitContext::readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf )
164 {
165     if( nOrigOffset + nLen > m_nReadLen )
166         return 0;
167 
168     if( osl_setFilePos( m_aReadHandle,
169                         osl_Pos_Absolut,
170                         nOrigOffset ) != osl_File_E_None )
171     {
172         return 0;
173     }
174 
175     sal_uInt64 nBytesRead = 0;
176     if( osl_readFile( m_aReadHandle,
177                       pBuf,
178                       nLen,
179                       &nBytesRead ) != osl_File_E_None )
180     {
181         return 0;
182     }
183     return static_cast<unsigned int>(nBytesRead);
184 }
185 
186 
PDFDetector(uno::Reference<uno::XComponentContext> xContext)187 PDFDetector::PDFDetector( uno::Reference< uno::XComponentContext > xContext) :
188     m_xContext(std::move( xContext ))
189 {}
190 
191 namespace
192 {
193 
fillAttributes(uno::Sequence<beans::PropertyValue> const & rFilterData,uno::Reference<io::XInputStream> & xInput,OUString & aURL,sal_Int32 & nFilterNamePos,sal_Int32 & nPasswordPos,OUString & aPassword)194 sal_Int32 fillAttributes(uno::Sequence<beans::PropertyValue> const& rFilterData, uno::Reference<io::XInputStream>& xInput, OUString& aURL, sal_Int32& nFilterNamePos, sal_Int32& nPasswordPos, OUString& aPassword)
195 {
196     const beans::PropertyValue* pAttribs = rFilterData.getConstArray();
197     sal_Int32 nAttribs = rFilterData.getLength();
198     for (sal_Int32 i = 0; i < nAttribs; i++)
199     {
200         OUString aVal( u"<no string>"_ustr );
201         pAttribs[i].Value >>= aVal;
202         SAL_INFO("sdext.pdfimport", "doDetection: Attrib: " + pAttribs[i].Name + " = " + aVal);
203 
204         if (pAttribs[i].Name == "InputStream")
205             pAttribs[i].Value >>= xInput;
206         else if (pAttribs[i].Name == "URL")
207             pAttribs[i].Value >>= aURL;
208         else if (pAttribs[i].Name == "FilterName")
209             nFilterNamePos = i;
210         else if (pAttribs[i].Name == "Password")
211         {
212             nPasswordPos = i;
213             pAttribs[i].Value >>= aPassword;
214         }
215     }
216     return nAttribs;
217 }
218 
219 // read the first 1024 byte (see PDF reference implementation note 12)
220 constexpr const sal_Int32 constHeaderSize = 1024;
221 
detectPDF(uno::Reference<io::XInputStream> const & xInput,uno::Sequence<sal_Int8> & aHeader,sal_uInt64 & nHeaderReadSize)222 bool detectPDF(uno::Reference<io::XInputStream> const& xInput, uno::Sequence<sal_Int8>& aHeader, sal_uInt64& nHeaderReadSize)
223 {
224     try
225     {
226         uno::Reference<io::XSeekable> xSeek(xInput, uno::UNO_QUERY);
227         if (xSeek.is())
228             xSeek->seek(0);
229 
230         nHeaderReadSize = xInput->readBytes(aHeader, constHeaderSize);
231         if (nHeaderReadSize <= 5)
232             return false;
233 
234         const sal_Int8* pBytes = aHeader.getConstArray();
235         for (sal_uInt64 i = 0; i < nHeaderReadSize - 5; i++)
236         {
237             if (pBytes[i+0] == '%' &&
238                 pBytes[i+1] == 'P' &&
239                 pBytes[i+2] == 'D' &&
240                 pBytes[i+3] == 'F' &&
241                 pBytes[i+4] == '-')
242             {
243                 return true;
244             }
245         }
246     }
247     catch (const css::io::IOException &)
248     {
249         TOOLS_WARN_EXCEPTION("sdext.pdfimport", "caught");
250     }
251     return false;
252 }
253 
copyToTemp(uno::Reference<io::XInputStream> const & xInput,oslFileHandle & rFileHandle,uno::Sequence<sal_Int8> const & aHeader,sal_uInt64 nHeaderReadSize)254 bool copyToTemp(uno::Reference<io::XInputStream> const& xInput, oslFileHandle& rFileHandle, uno::Sequence<sal_Int8> const& aHeader, sal_uInt64 nHeaderReadSize)
255 {
256     try
257     {
258         sal_uInt64 nWritten = 0;
259         osl_writeFile(rFileHandle, aHeader.getConstArray(), nHeaderReadSize, &nWritten);
260 
261         const sal_uInt64 nBufferSize = 4096;
262         uno::Sequence<sal_Int8> aBuffer(nBufferSize);
263 
264         // copy the bytes
265         sal_uInt64 nRead = 0;
266         do
267         {
268             nRead = xInput->readBytes(aBuffer, nBufferSize);
269             if (nRead > 0)
270             {
271                 osl_writeFile(rFileHandle, aBuffer.getConstArray(), nRead, &nWritten);
272                 if (nWritten != nRead)
273                     return false;
274             }
275         }
276         while (nRead == nBufferSize);
277     }
278     catch (const css::io::IOException &)
279     {
280         TOOLS_WARN_EXCEPTION("sdext.pdfimport", "caught");
281     }
282     return false;
283 }
284 
285 } // end anonymous namespace
286 
287 // XExtendedFilterDetection
detect(uno::Sequence<beans::PropertyValue> & rFilterData)288 OUString SAL_CALL PDFDetector::detect( uno::Sequence< beans::PropertyValue >& rFilterData )
289 {
290     std::unique_lock guard( m_aMutex );
291     bool bSuccess = false;
292 
293     // get the InputStream carrying the PDF content
294     uno::Reference<io::XInputStream> xInput;
295     uno::Reference<io::XStream> xEmbedStream;
296     OUString aOutFilterName;
297     OUString aOutTypeName;
298     OUString aURL;
299     OUString aPassword;
300 
301     sal_Int32 nFilterNamePos = -1;
302     sal_Int32 nPasswordPos = -1;
303     sal_Int32 nAttribs = fillAttributes(rFilterData, xInput, aURL, nFilterNamePos, nPasswordPos, aPassword);
304 
305     if (!xInput.is())
306         return OUString();
307 
308 
309     uno::Sequence<sal_Int8> aHeader(constHeaderSize);
310     sal_uInt64 nHeaderReadSize = 0;
311     bSuccess = detectPDF(xInput, aHeader, nHeaderReadSize);
312 
313     if (!bSuccess)
314         return OUString();
315 
316     oslFileHandle aFileHandle = nullptr;
317 
318     // check for hybrid PDF
319     if (bSuccess && (aURL.isEmpty() || !comphelper::isFileUrl(aURL)))
320     {
321         if (osl_createTempFile(nullptr, &aFileHandle, &aURL.pData) != osl_File_E_None)
322         {
323             bSuccess = false;
324         }
325         else
326         {
327             SAL_INFO( "sdext.pdfimport", "created temp file " + aURL);
328             bSuccess = copyToTemp(xInput, aFileHandle, aHeader, nHeaderReadSize);
329         }
330         osl_closeFile(aFileHandle);
331     }
332 
333     if (!bSuccess)
334     {
335         if (aFileHandle)
336             osl_removeFile(aURL.pData);
337         return OUString();
338     }
339 
340     OUString aEmbedMimetype;
341     xEmbedStream = getAdditionalStream(aURL, aEmbedMimetype, aPassword, m_xContext, rFilterData, false);
342 
343     if (aFileHandle)
344         osl_removeFile(aURL.pData);
345 
346     if (!aEmbedMimetype.isEmpty())
347     {
348         if( aEmbedMimetype == "application/vnd.oasis.opendocument.text"
349             || aEmbedMimetype == "application/vnd.oasis.opendocument.text-master" )
350             aOutFilterName = "writer_pdf_addstream_import";
351         else if ( aEmbedMimetype == "application/vnd.oasis.opendocument.presentation" )
352             aOutFilterName = "impress_pdf_addstream_import";
353         else if( aEmbedMimetype == "application/vnd.oasis.opendocument.graphics"
354                  || aEmbedMimetype == "application/vnd.oasis.opendocument.drawing" )
355             aOutFilterName = "draw_pdf_addstream_import";
356         else if ( aEmbedMimetype == "application/vnd.oasis.opendocument.spreadsheet" )
357             aOutFilterName = "calc_pdf_addstream_import";
358     }
359 
360     if (!aOutFilterName.isEmpty())
361     {
362         if( nFilterNamePos == -1 )
363         {
364             nFilterNamePos = nAttribs;
365             rFilterData.realloc( ++nAttribs );
366             rFilterData.getArray()[ nFilterNamePos ].Name = "FilterName";
367         }
368         auto pFilterData = rFilterData.getArray();
369         aOutTypeName = "pdf_Portable_Document_Format";
370 
371         pFilterData[nFilterNamePos].Value <<= aOutFilterName;
372         if( xEmbedStream.is() )
373         {
374             rFilterData.realloc( ++nAttribs );
375             pFilterData = rFilterData.getArray();
376             pFilterData[nAttribs-1].Name = "EmbeddedSubstream";
377             pFilterData[nAttribs-1].Value <<= xEmbedStream;
378         }
379         if (!aPassword.isEmpty())
380         {
381             if (nPasswordPos == -1)
382             {
383                 nPasswordPos = nAttribs;
384                 rFilterData.realloc(++nAttribs);
385                 pFilterData = rFilterData.getArray();
386                 pFilterData[nPasswordPos].Name = "Password";
387             }
388             pFilterData[nPasswordPos].Value <<= aPassword;
389         }
390     }
391     else
392     {
393         css::beans::PropertyValue* pFilterData;
394         if( nFilterNamePos == -1 )
395         {
396             nFilterNamePos = nAttribs;
397             rFilterData.realloc( ++nAttribs );
398             pFilterData = rFilterData.getArray();
399             pFilterData[ nFilterNamePos ].Name = "FilterName";
400         }
401         else
402             pFilterData = rFilterData.getArray();
403 
404         const sal_Int32 nDocumentType = 0; //const sal_Int32 nDocumentType = queryDocumentTypeDialog(m_xContext,aURL);
405         if( nDocumentType < 0 )
406         {
407             return OUString();
408         }
409         else
410         {
411             switch (nDocumentType)
412             {
413                 case 0:
414                     pFilterData[nFilterNamePos].Value <<= u"draw_pdf_import"_ustr;
415                     break;
416 
417                 case 1:
418                     pFilterData[nFilterNamePos].Value <<= u"impress_pdf_import"_ustr;
419                     break;
420 
421                 case 2:
422                     pFilterData[nFilterNamePos].Value <<= u"writer_pdf_import"_ustr;
423                     break;
424 
425                 default:
426                     assert(!"Unexpected case");
427             }
428         }
429 
430         aOutTypeName = "pdf_Portable_Document_Format";
431     }
432 
433     return aOutTypeName;
434 }
435 
getImplementationName()436 OUString PDFDetector::getImplementationName()
437 {
438     return u"org.libreoffice.comp.documents.PDFDetector"_ustr;
439 }
440 
supportsService(OUString const & ServiceName)441 sal_Bool PDFDetector::supportsService(OUString const & ServiceName)
442 {
443     return cppu::supportsService(this, ServiceName);
444 }
445 
getSupportedServiceNames()446 css::uno::Sequence<OUString> PDFDetector::getSupportedServiceNames()
447 {
448     return {u"com.sun.star.document.ImportFilter"_ustr};
449 }
450 
checkDocChecksum(const OUString & rInPDFFileURL,sal_uInt32 nBytes,const OUString & rChkSum)451 bool checkDocChecksum( const OUString& rInPDFFileURL,
452                        sal_uInt32           nBytes,
453                        const OUString& rChkSum )
454 {
455     if( rChkSum.getLength() != 2* RTL_DIGEST_LENGTH_MD5 )
456     {
457         SAL_INFO(
458             "sdext.pdfimport",
459             "checksum of length " << rChkSum.getLength() << ", expected "
460                 << 2*RTL_DIGEST_LENGTH_MD5);
461         return false;
462     }
463 
464     // prepare checksum to test
465     sal_uInt8 nTestChecksum[ RTL_DIGEST_LENGTH_MD5 ];
466     const sal_Unicode* pChar = rChkSum.getStr();
467     for(sal_uInt8 & rn : nTestChecksum)
468     {
469         sal_uInt8 nByte = sal_uInt8( ( (*pChar >= '0' && *pChar <= '9') ? *pChar - '0' :
470                           ( (*pChar >= 'A' && *pChar <= 'F') ? *pChar - 'A' + 10 :
471                           ( (*pChar >= 'a' && *pChar <= 'f') ? *pChar - 'a' + 10 :
472                           0 ) ) ) );
473         nByte <<= 4;
474         pChar++;
475         nByte |= ( (*pChar >= '0' && *pChar <= '9') ? *pChar - '0' :
476                  ( (*pChar >= 'A' && *pChar <= 'F') ? *pChar - 'A' + 10 :
477                  ( (*pChar >= 'a' && *pChar <= 'f') ? *pChar - 'a' + 10 :
478                  0 ) ) );
479         pChar++;
480         rn = nByte;
481     }
482 
483     // open file and calculate actual checksum up to index nBytes
484     ::std::vector<unsigned char> nChecksum;
485     ::comphelper::Hash aDigest(::comphelper::HashType::MD5);
486     oslFileHandle aRead = nullptr;
487     if( osl_openFile(rInPDFFileURL.pData,
488                      &aRead,
489                      osl_File_OpenFlag_Read ) == osl_File_E_None )
490     {
491         sal_uInt8 aBuf[4096];
492         sal_uInt32 nCur = 0;
493         sal_uInt64 nBytesRead = 0;
494         while( nCur < nBytes )
495         {
496             sal_uInt32 nPass = std::min<sal_uInt32>(nBytes - nCur, sizeof( aBuf ));
497             if( osl_readFile( aRead, aBuf, nPass, &nBytesRead) != osl_File_E_None
498                 || nBytesRead == 0 )
499             {
500                 break;
501             }
502             nPass = static_cast<sal_uInt32>(nBytesRead);
503             nCur += nPass;
504             aDigest.update(aBuf, nPass);
505         }
506 
507         nChecksum = aDigest.finalize();
508         osl_closeFile( aRead );
509     }
510 
511     // compare the contents
512     return nChecksum.size() == RTL_DIGEST_LENGTH_MD5
513         && (0 == memcmp(nChecksum.data(), nTestChecksum, nChecksum.size()));
514 }
515 
516 /* https://github.com/CollaboraOnline/online/issues/7307
517 
518    Light-weight detection to determine if this is a hybrid
519    pdf document worth parsing to get its AdditionalStream
520    and mimetype.
521 
522    TODO: a) do we really ignore the contents of the AdditionalStream
523    and re-parse to get it in the final importer?
524          b) in which case we could presumably parse the mimetype in
525    AdditionalStream here and drop the extraction of the stream.
526 */
detectHasAdditionalStreams(const OUString & rSysUPath)527 static bool detectHasAdditionalStreams(const OUString& rSysUPath)
528 {
529     SvFileStream aHybridDetect(rSysUPath, StreamMode::READ);
530     std::vector<OString> aTrailingLines;
531     const sal_uInt64 nLen = aHybridDetect.remainingSize();
532     aHybridDetect.Seek(nLen - std::min<sal_uInt64>(nLen, 4096));
533     OString aLine;
534     while (aHybridDetect.ReadLine(aLine))
535         aTrailingLines.push_back(aLine);
536     bool bAdditionalStreams(false);
537     for (auto it = aTrailingLines.rbegin(); it != aTrailingLines.rend(); ++it)
538     {
539         if (*it == "trailer")
540             break;
541         if (it->startsWith("/AdditionalStreams "))
542         {
543             bAdditionalStreams = true;
544             break;
545         }
546     }
547     return bAdditionalStreams;
548 }
549 
getAdditionalStream(const OUString & rInPDFFileURL,OUString & rOutMimetype,OUString & io_rPwd,const uno::Reference<uno::XComponentContext> & xContext,const uno::Sequence<beans::PropertyValue> & rFilterData,bool bMayUseUI)550 uno::Reference< io::XStream > getAdditionalStream( const OUString&                          rInPDFFileURL,
551                                                    OUString&                                rOutMimetype,
552                                                    OUString&                                io_rPwd,
553                                                    const uno::Reference<uno::XComponentContext>& xContext,
554                                                    const uno::Sequence<beans::PropertyValue>&    rFilterData,
555                                                    bool                                          bMayUseUI )
556 {
557     uno::Reference< io::XStream > xEmbed;
558     OUString aSysUPath;
559     if( osl_getSystemPathFromFileURL( rInPDFFileURL.pData, &aSysUPath.pData ) != osl_File_E_None )
560         return xEmbed;
561 
562     if (!detectHasAdditionalStreams(aSysUPath))
563         return xEmbed;
564 
565     std::unique_ptr<pdfparse::PDFEntry> pEntry(pdfparse::PDFReader::read(aSysUPath));
566     if( pEntry )
567     {
568         pdfparse::PDFFile* pPDFFile = dynamic_cast<pdfparse::PDFFile*>(pEntry.get());
569         if( pPDFFile )
570         {
571             unsigned int nElements = pPDFFile->m_aSubElements.size();
572             while( nElements-- > 0 )
573             {
574                 pdfparse::PDFTrailer* pTrailer = dynamic_cast<pdfparse::PDFTrailer*>(pPDFFile->m_aSubElements[nElements].get());
575                 if( pTrailer && pTrailer->m_pDict )
576                 {
577                     // search document checksum entry
578                     auto chk = pTrailer->m_pDict->m_aMap.find( "DocChecksum"_ostr );
579                     if( chk == pTrailer->m_pDict->m_aMap.end() )
580                     {
581                         SAL_INFO( "sdext.pdfimport", "no DocChecksum entry" );
582                         continue;
583                     }
584                     pdfparse::PDFName* pChkSumName = dynamic_cast<pdfparse::PDFName*>(chk->second);
585                     if( pChkSumName == nullptr )
586                     {
587                         SAL_INFO( "sdext.pdfimport", "no name for DocChecksum entry" );
588                         continue;
589                     }
590 
591                     // search for AdditionalStreams entry
592                     auto add_stream = pTrailer->m_pDict->m_aMap.find( "AdditionalStreams"_ostr );
593                     if( add_stream == pTrailer->m_pDict->m_aMap.end() )
594                     {
595                         SAL_INFO( "sdext.pdfimport", "no AdditionalStreams entry" );
596                         continue;
597                     }
598                     pdfparse::PDFArray* pStreams = dynamic_cast<pdfparse::PDFArray*>(add_stream->second);
599                     if( ! pStreams || pStreams->m_aSubElements.size() < 2 )
600                     {
601                         SAL_INFO( "sdext.pdfimport", "AdditionalStreams array too small" );
602                         continue;
603                     }
604 
605                     // check checksum
606                     OUString aChkSum = pChkSumName->getFilteredName();
607                     if( ! checkDocChecksum( rInPDFFileURL, pTrailer->m_nOffset, aChkSum ) )
608                         continue;
609 
610                     // extract addstream and mimetype
611                     pdfparse::PDFName* pMimeType = dynamic_cast<pdfparse::PDFName*>(pStreams->m_aSubElements[0].get());
612                     pdfparse::PDFObjectRef* pStreamRef = dynamic_cast<pdfparse::PDFObjectRef*>(pStreams->m_aSubElements[1].get());
613 
614                     SAL_WARN_IF( !pMimeType, "sdext.pdfimport", "error: no mimetype element" );
615                     SAL_WARN_IF( !pStreamRef, "sdext.pdfimport", "error: no stream ref element" );
616 
617                     if( pMimeType && pStreamRef )
618                     {
619                         pdfparse::PDFObject* pObject = pPDFFile->findObject( pStreamRef->m_nNumber, pStreamRef->m_nGeneration );
620                         SAL_WARN_IF( !pObject, "sdext.pdfimport", "object not found" );
621                         if( pObject )
622                         {
623                             if( pPDFFile->isEncrypted() )
624                             {
625                                 bool bAuthenticated = false;
626                                 if( !io_rPwd.isEmpty() )
627                                 {
628                                     OString aIsoPwd = OUStringToOString( io_rPwd,
629                                                                                    RTL_TEXTENCODING_ISO_8859_1 );
630                                     bAuthenticated = pPDFFile->setupDecryptionData( aIsoPwd );
631                                 }
632                                 if( ! bAuthenticated )
633                                 {
634                                     uno::Reference< task::XInteractionHandler > xIntHdl;
635                                     for( const beans::PropertyValue& rAttrib : rFilterData )
636                                     {
637                                         if ( rAttrib.Name == "InteractionHandler" )
638                                             rAttrib.Value >>= xIntHdl;
639                                     }
640                                     if( ! bMayUseUI || ! xIntHdl.is() )
641                                     {
642                                         rOutMimetype = pMimeType->getFilteredName();
643                                         xEmbed.clear();
644                                         break;
645                                     }
646 
647                                     OUString aDocName( rInPDFFileURL.copy( rInPDFFileURL.lastIndexOf( '/' )+1 ) );
648 
649                                     bool bEntered = false;
650                                     do
651                                     {
652                                         bEntered = getPassword( xIntHdl, io_rPwd, ! bEntered, aDocName );
653                                         OString aIsoPwd = OUStringToOString( io_rPwd,
654                                                                                        RTL_TEXTENCODING_ISO_8859_1 );
655                                         bAuthenticated = pPDFFile->setupDecryptionData( aIsoPwd );
656                                     } while( bEntered && ! bAuthenticated );
657                                 }
658 
659                                 if( ! bAuthenticated )
660                                     continue;
661                             }
662                             rOutMimetype = pMimeType->getFilteredName();
663                             FileEmitContext aContext( rInPDFFileURL,
664                                                       xContext,
665                                                       pPDFFile );
666                             aContext.m_bDecrypt = pPDFFile->isEncrypted();
667                             pObject->writeStream( aContext, pPDFFile );
668                             xEmbed = aContext.getContextStream();
669                             break; // success
670                         }
671                     }
672                 }
673             }
674         }
675     }
676 
677     return xEmbed;
678 }
679 
680 
681 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
sdext_PDFDetector_get_implementation(css::uno::XComponentContext * context,css::uno::Sequence<css::uno::Any> const &)682 sdext_PDFDetector_get_implementation(
683     css::uno::XComponentContext* context , css::uno::Sequence<css::uno::Any> const&)
684 {
685     return cppu::acquire(new PDFDetector(context));
686 }
687 
688 }
689 
690 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
691