1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ 2 /* 3 * This file is part of the LibreOffice project. 4 * 5 * This Source Code Form is subject to the terms of the Mozilla Public 6 * License, v. 2.0. If a copy of the MPL was not distributed with this 7 * file, You can obtain one at http://mozilla.org/MPL/2.0/. 8 * 9 * This file incorporates work covered by the following license notice: 10 * 11 * Licensed to the Apache Software Foundation (ASF) under one or more 12 * contributor license agreements. See the NOTICE file distributed 13 * with this work for additional information regarding copyright 14 * ownership. The ASF licenses this file to you under the Apache 15 * License, Version 2.0 (the "License"); you may not use this file 16 * except in compliance with the License. You may obtain a copy of 17 * the License at http://www.apache.org/licenses/LICENSE-2.0 . 18 */ 19 20 #include <string.h> 21 22 #include <comphelper/sequence.hxx> 23 #include <cppuhelper/implbase.hxx> 24 #include <cppuhelper/supportsservice.hxx> 25 26 #include <rtl/textenc.h> 27 #include <rtl/tencinfo.h> 28 29 #include <com/sun/star/io/BufferSizeExceededException.hpp> 30 #include <com/sun/star/io/IOException.hpp> 31 #include <com/sun/star/io/NotConnectedException.hpp> 32 #include <com/sun/star/io/XTextInputStream2.hpp> 33 #include <com/sun/star/lang/XServiceInfo.hpp> 34 35 #include <vector> 36 37 namespace com::sun::star::uno { class XComponentContext; } 38 39 using namespace ::osl; 40 using namespace ::cppu; 41 using namespace ::com::sun::star::uno; 42 using namespace ::com::sun::star::lang; 43 using namespace ::com::sun::star::io; 44 45 46 // Implementation XTextInputStream 47 48 #define INITIAL_UNICODE_BUFFER_CAPACITY 0x100 49 #define READ_BYTE_COUNT 0x100 50 51 namespace { 52 53 class OTextInputStream : public WeakImplHelper< XTextInputStream2, XServiceInfo > 54 { 55 Reference< XInputStream > mxStream; 56 57 // Encoding 58 bool mbEncodingInitialized; 59 rtl_TextToUnicodeConverter mConvText2Unicode; 60 rtl_TextToUnicodeContext mContextText2Unicode; 61 Sequence<sal_Int8> mSeqSource; 62 63 // Internal buffer for characters that are already converted successfully 64 std::vector<sal_Unicode> mvBuffer; 65 sal_Int32 mnCharsInBuffer; 66 bool mbReachedEOF; 67 68 /// @throws IOException 69 /// @throws RuntimeException 70 OUString implReadString( const Sequence< sal_Unicode >& Delimiters, 71 bool bRemoveDelimiter, bool bFindLineEnd ); 72 /// @throws IOException 73 /// @throws RuntimeException 74 sal_Int32 implReadNext(); 75 76 public: 77 OTextInputStream(); 78 virtual ~OTextInputStream() override; 79 80 // Methods XTextInputStream 81 virtual OUString SAL_CALL readLine( ) override; 82 virtual OUString SAL_CALL readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter ) override; 83 virtual sal_Bool SAL_CALL isEOF( ) override; 84 virtual void SAL_CALL setEncoding( const OUString& Encoding ) override; 85 86 // Methods XInputStream 87 virtual sal_Int32 SAL_CALL readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead ) override; 88 virtual sal_Int32 SAL_CALL readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead ) override; 89 virtual void SAL_CALL skipBytes( sal_Int32 nBytesToSkip ) override; 90 virtual sal_Int32 SAL_CALL available( ) override; 91 virtual void SAL_CALL closeInput( ) override; 92 93 // Methods XActiveDataSink 94 virtual void SAL_CALL setInputStream( const Reference< XInputStream >& aStream ) override; 95 virtual Reference< XInputStream > SAL_CALL getInputStream() override; 96 97 // Methods XServiceInfo 98 virtual OUString SAL_CALL getImplementationName() override; 99 virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override; 100 virtual sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override; 101 }; 102 103 } 104 105 OTextInputStream::OTextInputStream() 106 : mbEncodingInitialized(false) 107 , mConvText2Unicode(nullptr) 108 , mContextText2Unicode(nullptr) 109 , mSeqSource(READ_BYTE_COUNT) 110 , mvBuffer(INITIAL_UNICODE_BUFFER_CAPACITY, 0) 111 , mnCharsInBuffer(0) 112 , mbReachedEOF(false) 113 { 114 } 115 116 OTextInputStream::~OTextInputStream() 117 { 118 if( mbEncodingInitialized ) 119 { 120 rtl_destroyTextToUnicodeContext( mConvText2Unicode, mContextText2Unicode ); 121 rtl_destroyTextToUnicodeConverter( mConvText2Unicode ); 122 } 123 } 124 125 126 // XTextInputStream 127 128 OUString OTextInputStream::readLine( ) 129 { 130 static Sequence< sal_Unicode > aDummySeq; 131 return implReadString( aDummySeq, true, true ); 132 } 133 134 OUString OTextInputStream::readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter ) 135 { 136 return implReadString( Delimiters, bRemoveDelimiter, false ); 137 } 138 139 sal_Bool OTextInputStream::isEOF() 140 { 141 bool bRet = false; 142 if( mnCharsInBuffer == 0 && mbReachedEOF ) 143 bRet = true; 144 return bRet; 145 } 146 147 148 OUString OTextInputStream::implReadString( const Sequence< sal_Unicode >& Delimiters, 149 bool bRemoveDelimiter, bool bFindLineEnd ) 150 { 151 OUString aRetStr; 152 if( !mbEncodingInitialized ) 153 { 154 setEncoding( "utf8" ); 155 } 156 if( !mbEncodingInitialized ) 157 return aRetStr; 158 159 // Only for bFindLineEnd 160 sal_Unicode cLineEndChar1 = 0x0D; 161 sal_Unicode cLineEndChar2 = 0x0A; 162 163 sal_Int32 nBufferReadPos = 0; 164 sal_Int32 nCopyLen = 0; 165 bool bFound = false; 166 bool bFoundFirstLineEndChar = false; 167 sal_Unicode cFirstLineEndChar = 0; 168 while( !bFound ) 169 { 170 // Still characters available? 171 if( nBufferReadPos == mnCharsInBuffer ) 172 { 173 // Already reached EOF? Then we can't read any more 174 if( mbReachedEOF ) 175 break; 176 177 // No, so read new characters 178 if( !implReadNext() ) 179 break; 180 } 181 182 // Now there should be characters available 183 // (otherwise the loop should have been broken before) 184 sal_Unicode c = mvBuffer[ nBufferReadPos++ ]; 185 186 if( bFindLineEnd ) 187 { 188 if( bFoundFirstLineEndChar ) 189 { 190 bFound = true; 191 nCopyLen = nBufferReadPos - 2; 192 if( c == cLineEndChar1 || c == cLineEndChar2 ) 193 { 194 // Same line end char -> new line break 195 if( c == cFirstLineEndChar ) 196 { 197 nBufferReadPos--; 198 } 199 } 200 else 201 { 202 // No second line end char 203 nBufferReadPos--; 204 } 205 } 206 else if( c == cLineEndChar1 || c == cLineEndChar2 ) 207 { 208 bFoundFirstLineEndChar = true; 209 cFirstLineEndChar = c; 210 } 211 } 212 else if( comphelper::findValue(Delimiters, c) != -1 ) 213 { 214 bFound = true; 215 nCopyLen = nBufferReadPos; 216 if( bRemoveDelimiter ) 217 nCopyLen--; 218 } 219 } 220 221 // Nothing found? Return all 222 if( !nCopyLen && !bFound && mbReachedEOF ) 223 nCopyLen = nBufferReadPos; 224 225 // Create string 226 if( nCopyLen ) 227 aRetStr = OUString( mvBuffer.data(), nCopyLen ); 228 229 // Copy rest of buffer 230 memmove( mvBuffer.data(), mvBuffer.data() + nBufferReadPos, 231 (mnCharsInBuffer - nBufferReadPos) * sizeof( sal_Unicode ) ); 232 mnCharsInBuffer -= nBufferReadPos; 233 234 return aRetStr; 235 } 236 237 238 sal_Int32 OTextInputStream::implReadNext() 239 { 240 sal_Int32 nFreeBufferSize = mvBuffer.size() - mnCharsInBuffer; 241 if( nFreeBufferSize < READ_BYTE_COUNT ) 242 mvBuffer.resize(mvBuffer.size() * 2); 243 nFreeBufferSize = mvBuffer.size() - mnCharsInBuffer; 244 245 try 246 { 247 sal_Int32 nRead = mxStream->readSomeBytes( mSeqSource, READ_BYTE_COUNT ); 248 sal_Int32 nTotalRead = nRead; 249 if( nRead == 0 ) 250 mbReachedEOF = true; 251 252 // Try to convert 253 sal_uInt32 uiInfo; 254 sal_Size nSrcCvtBytes = 0; 255 sal_Size nTargetCount = 0; 256 sal_Size nSourceCount = 0; 257 while( true ) 258 { 259 const sal_Int8 *pbSource = mSeqSource.getConstArray(); 260 261 // All invalid characters are transformed to the unicode undefined char 262 nTargetCount += rtl_convertTextToUnicode( 263 mConvText2Unicode, 264 mContextText2Unicode, 265 reinterpret_cast<const char*>(&( pbSource[nSourceCount] )), 266 nTotalRead - nSourceCount, 267 mvBuffer.data() + mnCharsInBuffer + nTargetCount, 268 nFreeBufferSize - nTargetCount, 269 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT | 270 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT | 271 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT, 272 &uiInfo, 273 &nSrcCvtBytes ); 274 nSourceCount += nSrcCvtBytes; 275 276 bool bCont = false; 277 if( uiInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL ) 278 { 279 mvBuffer.resize(mvBuffer.size() * 2); 280 bCont = true; 281 } 282 283 if( uiInfo & RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL ) 284 { 285 // read next byte 286 static Sequence< sal_Int8 > aOneByteSeq( 1 ); 287 nRead = mxStream->readSomeBytes( aOneByteSeq, 1 ); 288 if( nRead == 0 ) 289 { 290 mbReachedEOF = true; 291 break; 292 } 293 294 sal_Int32 nOldLen = mSeqSource.getLength(); 295 nTotalRead++; 296 if( nTotalRead > nOldLen ) 297 { 298 mSeqSource.realloc( nTotalRead ); 299 } 300 mSeqSource.getArray()[ nOldLen ] = aOneByteSeq.getConstArray()[ 0 ]; 301 bCont = true; 302 } 303 304 if( bCont ) 305 continue; 306 break; 307 } 308 309 mnCharsInBuffer += nTargetCount; 310 return nTargetCount; 311 } 312 catch( NotConnectedException& ) 313 { 314 throw IOException("Not connected"); 315 //throw IOException( L"OTextInputStream::implReadString failed" ); 316 } 317 catch( BufferSizeExceededException& ) 318 { 319 throw IOException("Buffer size exceeded"); 320 } 321 } 322 323 void OTextInputStream::setEncoding( const OUString& Encoding ) 324 { 325 OString aOEncodingStr = OUStringToOString( Encoding, RTL_TEXTENCODING_ASCII_US ); 326 rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset( aOEncodingStr.getStr() ); 327 if( RTL_TEXTENCODING_DONTKNOW == encoding ) 328 return; 329 330 mbEncodingInitialized = true; 331 mConvText2Unicode = rtl_createTextToUnicodeConverter( encoding ); 332 mContextText2Unicode = rtl_createTextToUnicodeContext( mConvText2Unicode ); 333 } 334 335 336 // XInputStream 337 338 sal_Int32 OTextInputStream::readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead ) 339 { 340 return mxStream->readBytes( aData, nBytesToRead ); 341 } 342 343 sal_Int32 OTextInputStream::readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead ) 344 { 345 return mxStream->readSomeBytes( aData, nMaxBytesToRead ); 346 } 347 348 void OTextInputStream::skipBytes( sal_Int32 nBytesToSkip ) 349 { 350 mxStream->skipBytes( nBytesToSkip ); 351 } 352 353 sal_Int32 OTextInputStream::available( ) 354 { 355 return mxStream->available(); 356 } 357 358 void OTextInputStream::closeInput( ) 359 { 360 mxStream->closeInput(); 361 } 362 363 364 // XActiveDataSink 365 366 void OTextInputStream::setInputStream( const Reference< XInputStream >& aStream ) 367 { 368 mxStream = aStream; 369 } 370 371 Reference< XInputStream > OTextInputStream::getInputStream() 372 { 373 return mxStream; 374 } 375 376 OUString OTextInputStream::getImplementationName() 377 { 378 return "com.sun.star.comp.io.TextInputStream"; 379 } 380 381 sal_Bool OTextInputStream::supportsService(const OUString& ServiceName) 382 { 383 return cppu::supportsService(this, ServiceName); 384 } 385 386 Sequence< OUString > OTextInputStream::getSupportedServiceNames() 387 { 388 return { "com.sun.star.io.TextInputStream" }; 389 } 390 391 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface* 392 io_OTextInputStream_get_implementation( 393 css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&) 394 { 395 return cppu::acquire(new OTextInputStream()); 396 } 397 398 399 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */ 400
