1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <string.h>
21 
22 #include <comphelper/sequence.hxx>
23 #include <cppuhelper/implbase.hxx>
24 #include <cppuhelper/supportsservice.hxx>
25 
26 #include <rtl/textenc.h>
27 #include <rtl/tencinfo.h>
28 
29 #include <com/sun/star/io/BufferSizeExceededException.hpp>
30 #include <com/sun/star/io/IOException.hpp>
31 #include <com/sun/star/io/NotConnectedException.hpp>
32 #include <com/sun/star/io/XTextInputStream2.hpp>
33 #include <com/sun/star/lang/XServiceInfo.hpp>
34 
35 #include <vector>
36 
37 namespace com::sun::star::uno { class XComponentContext; }
38 
39 using namespace ::osl;
40 using namespace ::cppu;
41 using namespace ::com::sun::star::uno;
42 using namespace ::com::sun::star::lang;
43 using namespace ::com::sun::star::io;
44 
45 
46 // Implementation XTextInputStream
47 
48 #define INITIAL_UNICODE_BUFFER_CAPACITY     0x100
49 #define READ_BYTE_COUNT                     0x100
50 
51 namespace {
52 
53 class OTextInputStream : public WeakImplHelper< XTextInputStream2, XServiceInfo >
54 {
55     Reference< XInputStream > mxStream;
56 
57     // Encoding
58     bool mbEncodingInitialized;
59     rtl_TextToUnicodeConverter  mConvText2Unicode;
60     rtl_TextToUnicodeContext    mContextText2Unicode;
61     Sequence<sal_Int8>          mSeqSource;
62 
63     // Internal buffer for characters that are already converted successfully
64     std::vector<sal_Unicode> mvBuffer;
65     sal_Int32 mnCharsInBuffer;
66     bool mbReachedEOF;
67 
68     /// @throws IOException
69     /// @throws RuntimeException
70     OUString implReadString( const Sequence< sal_Unicode >& Delimiters,
71         bool bRemoveDelimiter, bool bFindLineEnd );
72     /// @throws IOException
73     /// @throws RuntimeException
74     sal_Int32 implReadNext();
75 
76 public:
77     OTextInputStream();
78     virtual ~OTextInputStream() override;
79 
80     // Methods XTextInputStream
81     virtual OUString SAL_CALL readLine(  ) override;
82     virtual OUString SAL_CALL readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter ) override;
83     virtual sal_Bool SAL_CALL isEOF(  ) override;
84     virtual void SAL_CALL setEncoding( const OUString& Encoding ) override;
85 
86     // Methods XInputStream
87     virtual sal_Int32 SAL_CALL readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead ) override;
88     virtual sal_Int32 SAL_CALL readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead ) override;
89     virtual void SAL_CALL skipBytes( sal_Int32 nBytesToSkip ) override;
90     virtual sal_Int32 SAL_CALL available(  ) override;
91     virtual void SAL_CALL closeInput(  ) override;
92 
93     // Methods XActiveDataSink
94     virtual void SAL_CALL setInputStream( const Reference< XInputStream >& aStream ) override;
95     virtual Reference< XInputStream > SAL_CALL getInputStream() override;
96 
97     // Methods XServiceInfo
98         virtual OUString              SAL_CALL getImplementationName() override;
99         virtual Sequence< OUString >  SAL_CALL getSupportedServiceNames() override;
100         virtual sal_Bool              SAL_CALL supportsService(const OUString& ServiceName) override;
101 };
102 
103 }
104 
105 OTextInputStream::OTextInputStream()
106     : mbEncodingInitialized(false)
107     , mConvText2Unicode(nullptr)
108     , mContextText2Unicode(nullptr)
109     , mSeqSource(READ_BYTE_COUNT)
110     , mvBuffer(INITIAL_UNICODE_BUFFER_CAPACITY, 0)
111     , mnCharsInBuffer(0)
112     , mbReachedEOF(false)
113 {
114 }
115 
116 OTextInputStream::~OTextInputStream()
117 {
118     if( mbEncodingInitialized )
119     {
120         rtl_destroyTextToUnicodeContext( mConvText2Unicode, mContextText2Unicode );
121         rtl_destroyTextToUnicodeConverter( mConvText2Unicode );
122     }
123 }
124 
125 
126 // XTextInputStream
127 
128 OUString OTextInputStream::readLine(  )
129 {
130     static Sequence< sal_Unicode > aDummySeq;
131     return implReadString( aDummySeq, true, true );
132 }
133 
134 OUString OTextInputStream::readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter )
135 {
136     return implReadString( Delimiters, bRemoveDelimiter, false );
137 }
138 
139 sal_Bool OTextInputStream::isEOF()
140 {
141     bool bRet = false;
142     if( mnCharsInBuffer == 0 && mbReachedEOF )
143         bRet = true;
144     return bRet;
145 }
146 
147 
148 OUString OTextInputStream::implReadString( const Sequence< sal_Unicode >& Delimiters,
149                                            bool bRemoveDelimiter, bool bFindLineEnd )
150 {
151     OUString aRetStr;
152     if( !mbEncodingInitialized )
153     {
154         setEncoding( "utf8" );
155     }
156     if( !mbEncodingInitialized )
157         return aRetStr;
158 
159     // Only for bFindLineEnd
160     sal_Unicode cLineEndChar1 = 0x0D;
161     sal_Unicode cLineEndChar2 = 0x0A;
162 
163     sal_Int32 nBufferReadPos = 0;
164     sal_Int32 nCopyLen = 0;
165     bool bFound = false;
166     bool bFoundFirstLineEndChar = false;
167     sal_Unicode cFirstLineEndChar = 0;
168     while( !bFound )
169     {
170         // Still characters available?
171         if( nBufferReadPos == mnCharsInBuffer )
172         {
173             // Already reached EOF? Then we can't read any more
174             if( mbReachedEOF )
175                 break;
176 
177             // No, so read new characters
178             if( !implReadNext() )
179                 break;
180         }
181 
182         // Now there should be characters available
183         // (otherwise the loop should have been broken before)
184         sal_Unicode c = mvBuffer[ nBufferReadPos++ ];
185 
186         if( bFindLineEnd )
187         {
188             if( bFoundFirstLineEndChar )
189             {
190                 bFound = true;
191                 nCopyLen = nBufferReadPos - 2;
192                 if( c == cLineEndChar1 || c == cLineEndChar2 )
193                 {
194                     // Same line end char -> new line break
195                     if( c == cFirstLineEndChar )
196                     {
197                         nBufferReadPos--;
198                     }
199                 }
200                 else
201                 {
202                     // No second line end char
203                     nBufferReadPos--;
204                 }
205             }
206             else if( c == cLineEndChar1 || c == cLineEndChar2 )
207             {
208                 bFoundFirstLineEndChar = true;
209                 cFirstLineEndChar = c;
210             }
211         }
212         else if( comphelper::findValue(Delimiters, c) != -1 )
213         {
214             bFound = true;
215             nCopyLen = nBufferReadPos;
216             if( bRemoveDelimiter )
217                 nCopyLen--;
218         }
219     }
220 
221     // Nothing found? Return all
222     if( !nCopyLen && !bFound && mbReachedEOF )
223         nCopyLen = nBufferReadPos;
224 
225     // Create string
226     if( nCopyLen )
227         aRetStr = OUString( mvBuffer.data(), nCopyLen );
228 
229     // Copy rest of buffer
230     memmove( mvBuffer.data(), mvBuffer.data() + nBufferReadPos,
231         (mnCharsInBuffer - nBufferReadPos) * sizeof( sal_Unicode ) );
232     mnCharsInBuffer -= nBufferReadPos;
233 
234     return aRetStr;
235 }
236 
237 
238 sal_Int32 OTextInputStream::implReadNext()
239 {
240     sal_Int32 nFreeBufferSize = mvBuffer.size() - mnCharsInBuffer;
241     if( nFreeBufferSize < READ_BYTE_COUNT )
242         mvBuffer.resize(mvBuffer.size() * 2);
243     nFreeBufferSize = mvBuffer.size() - mnCharsInBuffer;
244 
245     try
246     {
247         sal_Int32 nRead = mxStream->readSomeBytes( mSeqSource, READ_BYTE_COUNT );
248         sal_Int32 nTotalRead = nRead;
249         if( nRead == 0 )
250             mbReachedEOF = true;
251 
252         // Try to convert
253         sal_uInt32 uiInfo;
254         sal_Size nSrcCvtBytes = 0;
255         sal_Size nTargetCount = 0;
256         sal_Size nSourceCount = 0;
257         while( true )
258         {
259             const sal_Int8 *pbSource = mSeqSource.getConstArray();
260 
261             // All invalid characters are transformed to the unicode undefined char
262             nTargetCount += rtl_convertTextToUnicode(
263                                 mConvText2Unicode,
264                                 mContextText2Unicode,
265                                 reinterpret_cast<const char*>(&( pbSource[nSourceCount] )),
266                                 nTotalRead - nSourceCount,
267                                 mvBuffer.data() + mnCharsInBuffer + nTargetCount,
268                                 nFreeBufferSize - nTargetCount,
269                                 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT   |
270                                 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
271                                 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
272                                 &uiInfo,
273                                 &nSrcCvtBytes );
274             nSourceCount += nSrcCvtBytes;
275 
276             bool bCont = false;
277             if( uiInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL )
278             {
279                 mvBuffer.resize(mvBuffer.size() * 2);
280                 bCont = true;
281             }
282 
283             if( uiInfo & RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL )
284             {
285                 // read next byte
286                 static Sequence< sal_Int8 > aOneByteSeq( 1 );
287                 nRead = mxStream->readSomeBytes( aOneByteSeq, 1 );
288                 if( nRead == 0 )
289                 {
290                     mbReachedEOF = true;
291                     break;
292                 }
293 
294                 sal_Int32 nOldLen = mSeqSource.getLength();
295                 nTotalRead++;
296                 if( nTotalRead > nOldLen )
297                 {
298                     mSeqSource.realloc( nTotalRead );
299                 }
300                 mSeqSource.getArray()[ nOldLen ] = aOneByteSeq.getConstArray()[ 0 ];
301                 bCont = true;
302             }
303 
304             if( bCont )
305                 continue;
306             break;
307         }
308 
309         mnCharsInBuffer += nTargetCount;
310         return nTargetCount;
311     }
312     catch( NotConnectedException& )
313     {
314         throw IOException("Not connected");
315         //throw IOException( L"OTextInputStream::implReadString failed" );
316     }
317     catch( BufferSizeExceededException& )
318     {
319         throw IOException("Buffer size exceeded");
320     }
321 }
322 
323 void OTextInputStream::setEncoding( const OUString& Encoding )
324 {
325     OString aOEncodingStr = OUStringToOString( Encoding, RTL_TEXTENCODING_ASCII_US );
326     rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset( aOEncodingStr.getStr() );
327     if( RTL_TEXTENCODING_DONTKNOW == encoding )
328         return;
329 
330     mbEncodingInitialized = true;
331     mConvText2Unicode = rtl_createTextToUnicodeConverter( encoding );
332     mContextText2Unicode = rtl_createTextToUnicodeContext( mConvText2Unicode );
333 }
334 
335 
336 // XInputStream
337 
338 sal_Int32 OTextInputStream::readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead )
339 {
340     return mxStream->readBytes( aData, nBytesToRead );
341 }
342 
343 sal_Int32 OTextInputStream::readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead )
344 {
345     return mxStream->readSomeBytes( aData, nMaxBytesToRead );
346 }
347 
348 void OTextInputStream::skipBytes( sal_Int32 nBytesToSkip )
349 {
350     mxStream->skipBytes( nBytesToSkip );
351 }
352 
353 sal_Int32 OTextInputStream::available(  )
354 {
355     return mxStream->available();
356 }
357 
358 void OTextInputStream::closeInput(  )
359 {
360     mxStream->closeInput();
361 }
362 
363 
364 // XActiveDataSink
365 
366 void OTextInputStream::setInputStream( const Reference< XInputStream >& aStream )
367 {
368     mxStream = aStream;
369 }
370 
371 Reference< XInputStream > OTextInputStream::getInputStream()
372 {
373     return mxStream;
374 }
375 
376 OUString OTextInputStream::getImplementationName()
377 {
378     return "com.sun.star.comp.io.TextInputStream";
379 }
380 
381 sal_Bool OTextInputStream::supportsService(const OUString& ServiceName)
382 {
383     return cppu::supportsService(this, ServiceName);
384 }
385 
386 Sequence< OUString > OTextInputStream::getSupportedServiceNames()
387 {
388     return { "com.sun.star.io.TextInputStream" };
389 }
390 
391 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
392 io_OTextInputStream_get_implementation(
393     css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
394 {
395     return cppu::acquire(new OTextInputStream());
396 }
397 
398 
399 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
400