xref: /core/linguistic/source/dicimp.cxx (revision 3d236177)
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 
21 #include <cppuhelper/factory.hxx>
22 #include "dicimp.hxx"
23 #include <i18nlangtag/lang.h>
24 #include <i18nlangtag/languagetag.hxx>
25 #include <linguistic/misc.hxx>
26 #include <osl/mutex.hxx>
27 #include <osl/thread.h>
28 #include <sal/log.hxx>
29 #include <tools/debug.hxx>
30 #include <tools/stream.hxx>
31 #include <tools/urlobj.hxx>
32 #include <comphelper/processfactory.hxx>
33 #include <comphelper/string.hxx>
34 #include <comphelper/sequence.hxx>
35 #include <unotools/ucbstreamhelper.hxx>
36 
37 #include <com/sun/star/ucb/SimpleFileAccess.hpp>
38 #include <com/sun/star/linguistic2/DictionaryEventFlags.hpp>
39 #include <com/sun/star/io/TempFile.hpp>
40 #include <com/sun/star/io/XInputStream.hpp>
41 
42 #include <com/sun/star/linguistic2/LinguServiceManager.hpp>
43 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
44 
45 #include <algorithm>
46 #include <utility>
47 
48 
49 using namespace utl;
50 using namespace osl;
51 using namespace com::sun::star;
52 using namespace com::sun::star::lang;
53 using namespace com::sun::star::uno;
54 using namespace com::sun::star::linguistic2;
55 using namespace linguistic;
56 
57 
58 #define BUFSIZE             4096
59 #define VERS2_NOLANGUAGE    1024
60 
61 #define MAX_HEADER_LENGTH 16
62 
63 // XML-header to query SPELLML support
64 // to handle user words with "Grammar By" model words
65 constexpr OUStringLiteral SPELLML_SUPPORT = u"<?xml?>";
66 
67 // User dictionaries can contain optional "title:" tags
68 // to support custom titles with space and other characters.
69 // (old mechanism stores the title of the user dictionary
70 // only in its file name, but special characters are
71 // problem for user dictionaries shipped with LibreOffice).
72 //
73 // The following fake file name extension will be
74 // added to the text of the title: field for correct
75 // text stripping and dictionary saving.
76 constexpr OUStringLiteral EXTENSION_FOR_TITLE_TEXT = u".";
77 
78 const char* const pVerStr2    = "WBSWG2";
79 const char* const pVerStr5    = "WBSWG5";
80 const char* const pVerStr6    = "WBSWG6";
81 const char* const pVerOOo7    = "OOoUserDict1";
82 
83 const sal_Int16 DIC_VERSION_DONTKNOW = -1;
84 const sal_Int16 DIC_VERSION_2 = 2;
85 const sal_Int16 DIC_VERSION_5 = 5;
86 const sal_Int16 DIC_VERSION_6 = 6;
87 const sal_Int16 DIC_VERSION_7 = 7;
88 
89 static uno::Reference< XLinguServiceManager2 > GetLngSvcMgr_Impl()
90 {
91     uno::Reference< XComponentContext > xContext( comphelper::getProcessComponentContext() );
92     uno::Reference< XLinguServiceManager2 > xRes = LinguServiceManager::create( xContext ) ;
93     return xRes;
94 }
95 
96 static bool getTag(std::string_view rLine, std::string_view rTagName,
97     OString &rTagValue)
98 {
99     size_t nPos = rLine.find(rTagName);
100     if (nPos == std::string_view::npos)
101         return false;
102 
103     rTagValue = OString(comphelper::string::strip(rLine.substr(nPos + rTagName.size()),
104         ' '));
105     return true;
106 }
107 
108 
109 sal_Int16 ReadDicVersion( SvStream& rStream, LanguageType &nLng, bool &bNeg, OUString &aDicName )
110 {
111     // Sniff the header
112     sal_Int16 nDicVersion = DIC_VERSION_DONTKNOW;
113     char pMagicHeader[MAX_HEADER_LENGTH];
114 
115     nLng = LANGUAGE_NONE;
116     bNeg = false;
117 
118     if (rStream.GetError())
119         return -1;
120 
121     sal_uInt64 const nSniffPos = rStream.Tell();
122     static std::size_t nVerOOo7Len = sal::static_int_cast< std::size_t >(strlen( pVerOOo7 ));
123     pMagicHeader[ nVerOOo7Len ] = '\0';
124     if ((rStream.ReadBytes(static_cast<void *>(pMagicHeader), nVerOOo7Len) == nVerOOo7Len) &&
125         !strcmp(pMagicHeader, pVerOOo7))
126     {
127         bool bSuccess;
128         OStringBuffer aLine;
129 
130         nDicVersion = DIC_VERSION_7;
131 
132         // 1st skip magic / header line
133         rStream.ReadLine(aLine);
134 
135         // 2nd line: language all | en-US | pt-BR ...
136         while ((bSuccess = rStream.ReadLine(aLine)))
137         {
138             OString aTagValue;
139 
140             if (aLine[0] == '#') // skip comments
141                 continue;
142 
143             // lang: field
144             if (getTag(aLine, "lang: ", aTagValue))
145             {
146                 if (aTagValue == "<none>")
147                     nLng = LANGUAGE_NONE;
148                 else
149                     nLng = LanguageTag::convertToLanguageType(
150                             OStringToOUString( aTagValue, RTL_TEXTENCODING_ASCII_US));
151             }
152 
153             // type: negative / positive
154             if (getTag(aLine, "type: ", aTagValue))
155             {
156                 bNeg = aTagValue == "negative";
157             }
158 
159             // lang: title
160             if (getTag(aLine, "title: ", aTagValue))
161             {
162                 aDicName = OStringToOUString( aTagValue, RTL_TEXTENCODING_UTF8) +
163                     // recent title text preparation in GetDicInfoStr() waits for an
164                     // extension, so we add it to avoid bad stripping at final dot
165                     // of the title text
166                     EXTENSION_FOR_TITLE_TEXT;
167             }
168 
169             if (std::string_view(aLine).find("---") != std::string_view::npos) // end of header
170                 break;
171         }
172         if (!bSuccess)
173             return -2;
174     }
175     else
176     {
177         sal_uInt16 nLen;
178 
179         rStream.Seek (nSniffPos );
180 
181         rStream.ReadUInt16( nLen );
182         if (nLen >= MAX_HEADER_LENGTH)
183             return -1;
184 
185         rStream.ReadBytes(pMagicHeader, nLen);
186         pMagicHeader[nLen] = '\0';
187 
188         // Check version magic
189         if (0 == strcmp( pMagicHeader, pVerStr6 ))
190             nDicVersion = DIC_VERSION_6;
191         else if (0 == strcmp( pMagicHeader, pVerStr5 ))
192             nDicVersion = DIC_VERSION_5;
193         else if (0 == strcmp( pMagicHeader, pVerStr2 ))
194             nDicVersion = DIC_VERSION_2;
195         else
196             nDicVersion = DIC_VERSION_DONTKNOW;
197 
198         if (DIC_VERSION_2 == nDicVersion ||
199             DIC_VERSION_5 == nDicVersion ||
200             DIC_VERSION_6 == nDicVersion)
201         {
202             // The language of the dictionary
203             sal_uInt16 nTmp = 0;
204             rStream.ReadUInt16( nTmp );
205             nLng = LanguageType(nTmp);
206             if (VERS2_NOLANGUAGE == static_cast<sal_uInt16>(nLng))
207                 nLng = LANGUAGE_NONE;
208 
209             // Negative Flag
210             rStream.ReadCharAsBool( bNeg );
211         }
212     }
213 
214     return nDicVersion;
215 }
216 
217 DictionaryNeo::DictionaryNeo(OUString aName,
218                              LanguageType nLang, DictionaryType eType,
219                              const OUString &rMainURL,
220                              bool bWriteable) :
221     aDicEvtListeners( GetLinguMutex() ),
222     aDicName        (std::move(aName)),
223     aMainURL        (rMainURL),
224     eDicType        (eType),
225     nLanguage       (nLang)
226 {
227     nDicVersion  = DIC_VERSION_DONTKNOW;
228     bNeedEntries = true;
229     bIsModified  = bIsActive = false;
230     bIsReadonly = !bWriteable;
231 
232     if( !rMainURL.isEmpty())
233     {
234         bool bExists = FileExists( rMainURL );
235         if( !bExists )
236         {
237             // save new dictionaries with in Format 7 (UTF8 plain text)
238             nDicVersion  = DIC_VERSION_7;
239 
240             //! create physical representation of an **empty** dictionary
241             //! that could be found by the dictionary-list implementation
242             // (Note: empty dictionaries are not just empty files!)
243             DBG_ASSERT( !bIsReadonly,
244                     "DictionaryNeo: dictionaries should be writeable if they are to be saved" );
245             if (!bIsReadonly)
246                 saveEntries( rMainURL );
247             bNeedEntries = false;
248         }
249     }
250     else
251     {
252         // non persistent dictionaries (like IgnoreAllList) should always be writable
253         bIsReadonly  = false;
254         bNeedEntries = false;
255     }
256 }
257 
258 DictionaryNeo::~DictionaryNeo()
259 {
260 }
261 
262 ErrCode DictionaryNeo::loadEntries(const OUString &rMainURL)
263 {
264     MutexGuard  aGuard( GetLinguMutex() );
265 
266     // counter check that it is safe to set bIsModified to sal_False at
267     // the end of the function
268     DBG_ASSERT(!bIsModified, "lng : dictionary already modified!");
269 
270     // function should only be called once in order to load entries from file
271     bNeedEntries = false;
272 
273     if (rMainURL.isEmpty())
274         return ERRCODE_NONE;
275 
276     uno::Reference< uno::XComponentContext > xContext( comphelper::getProcessComponentContext() );
277 
278     // get XInputStream stream
279     uno::Reference< io::XInputStream > xStream;
280     try
281     {
282         uno::Reference< ucb::XSimpleFileAccess3 > xAccess( ucb::SimpleFileAccess::create(xContext) );
283         xStream = xAccess->openFileRead( rMainURL );
284     }
285     catch (const uno::Exception &)
286     {
287         SAL_WARN( "linguistic", "failed to get input stream" );
288     }
289     if (!xStream.is())
290         return ErrCode(sal_uInt32(-1));
291 
292     std::unique_ptr<SvStream> pStream( utl::UcbStreamHelper::CreateStream( xStream ) );
293 
294     // read header
295     bool bNegativ;
296     LanguageType nLang;
297     nDicVersion = ReadDicVersion(*pStream, nLang, bNegativ, aDicName);
298     ErrCode nErr = pStream->GetError();
299     if (nErr != ERRCODE_NONE)
300         return nErr;
301 
302     nLanguage = nLang;
303 
304     eDicType = bNegativ ? DictionaryType_NEGATIVE : DictionaryType_POSITIVE;
305 
306     rtl_TextEncoding eEnc = osl_getThreadTextEncoding();
307     if (nDicVersion >= DIC_VERSION_6)
308         eEnc = RTL_TEXTENCODING_UTF8;
309     aEntries.clear();
310 
311     if (DIC_VERSION_6 == nDicVersion ||
312         DIC_VERSION_5 == nDicVersion ||
313         DIC_VERSION_2 == nDicVersion)
314     {
315         sal_uInt16  nLen = 0;
316         char aWordBuf[ BUFSIZE ];
317 
318         // Read the first word
319         if (!pStream->eof())
320         {
321             pStream->ReadUInt16( nLen );
322             if (ERRCODE_NONE != (nErr = pStream->GetError()))
323                 return nErr;
324             if ( nLen < BUFSIZE )
325             {
326                 pStream->ReadBytes(aWordBuf, nLen);
327                 if (ERRCODE_NONE != (nErr = pStream->GetError()))
328                     return nErr;
329                 *(aWordBuf + nLen) = 0;
330             }
331             else
332                 return SVSTREAM_READ_ERROR;
333         }
334 
335         while(!pStream->eof())
336         {
337             // Read from file
338             // Paste in dictionary without converting
339             if(*aWordBuf)
340             {
341                 OUString aText(aWordBuf, rtl_str_getLength(aWordBuf), eEnc);
342                 uno::Reference< XDictionaryEntry > xEntry =
343                         new DicEntry( aText, bNegativ );
344                 addEntry_Impl( xEntry, true ); //! don't launch events here
345             }
346 
347             pStream->ReadUInt16( nLen );
348             if (pStream->eof())
349                 break;
350             if (ERRCODE_NONE != (nErr = pStream->GetError()))
351                 return nErr;
352 
353             if (nLen < BUFSIZE)
354             {
355                 pStream->ReadBytes(aWordBuf, nLen);
356                 if (ERRCODE_NONE != (nErr = pStream->GetError()))
357                     return nErr;
358             }
359             else
360                 return SVSTREAM_READ_ERROR;
361             *(aWordBuf + nLen) = 0;
362         }
363     }
364     else if (DIC_VERSION_7 == nDicVersion)
365     {
366         OStringBuffer aLine;
367 
368         // remaining lines - stock strings (a [==] b)
369         while (pStream->ReadLine(aLine))
370         {
371             if (aLine.isEmpty() || aLine[0] == '#') // skip comments
372                 continue;
373             OUString aText = OStringToOUString(aLine, RTL_TEXTENCODING_UTF8);
374             uno::Reference< XDictionaryEntry > xEntry =
375                     new DicEntry( aText, eDicType == DictionaryType_NEGATIVE );
376             addEntry_Impl( xEntry, true ); //! don't launch events here
377         }
378     }
379 
380     SAL_WARN_IF(!isSorted(), "linguistic", "dictionary is not sorted");
381 
382     // since this routine should be called only initially (prior to any
383     // modification to be saved) we reset the bIsModified flag here that
384     // was implicitly set by addEntry_Impl
385     bIsModified = false;
386 
387     return pStream->GetError();
388 }
389 
390 static OString formatForSave(const uno::Reference< XDictionaryEntry > &xEntry,
391     rtl_TextEncoding eEnc )
392 {
393    OUStringBuffer aStr(xEntry->getDictionaryWord());
394 
395    if (xEntry->isNegative() || !xEntry->getReplacementText().isEmpty())
396    {
397        aStr.append("==" + xEntry->getReplacementText());
398    }
399    return OUStringToOString(aStr, eEnc);
400 }
401 
402 ErrCode DictionaryNeo::saveEntries(const OUString &rURL)
403 {
404     MutexGuard aGuard( GetLinguMutex() );
405 
406     if (rURL.isEmpty())
407         return ERRCODE_NONE;
408     DBG_ASSERT(!INetURLObject( rURL ).HasError(), "lng : invalid URL");
409 
410     uno::Reference< uno::XComponentContext > xContext( comphelper::getProcessComponentContext() );
411 
412     // get XOutputStream stream
413     uno::Reference<io::XStream> xStream;
414     try
415     {
416         xStream = io::TempFile::create(xContext);
417     }
418     catch (const uno::Exception &)
419     {
420         DBG_ASSERT( false, "failed to get input stream" );
421     }
422     if (!xStream.is())
423         return ErrCode(sal_uInt32(-1));
424 
425     std::unique_ptr<SvStream> pStream( utl::UcbStreamHelper::CreateStream( xStream ) );
426 
427     // Always write as the latest version, i.e. DIC_VERSION_7
428 
429     rtl_TextEncoding eEnc = RTL_TEXTENCODING_UTF8;
430     pStream->WriteLine(pVerOOo7);
431     ErrCode nErr = pStream->GetError();
432     if (nErr != ERRCODE_NONE)
433         return nErr;
434     /* XXX: the <none> case could be differentiated, is it absence or
435      * undetermined or multiple? Earlier versions did not know about 'und' and
436      * 'mul' and 'zxx' codes. Sync with ReadDicVersion() */
437     if (LinguIsUnspecified(nLanguage))
438         pStream->WriteLine("lang: <none>");
439     else
440     {
441         OString aLine = "lang: " + OUStringToOString(LanguageTag::convertToBcp47(nLanguage), eEnc);
442         pStream->WriteLine(aLine);
443     }
444     if (ERRCODE_NONE != (nErr = pStream->GetError()))
445         return nErr;
446     if (eDicType == DictionaryType_POSITIVE)
447         pStream->WriteLine("type: positive");
448     else
449         pStream->WriteLine("type: negative");
450     if (aDicName.endsWith(EXTENSION_FOR_TITLE_TEXT))
451     {
452         pStream->WriteLine(Concat2View("title: " + OUStringToOString(
453             // strip EXTENSION_FOR_TITLE_TEXT
454             aDicName.subView(0, aDicName.lastIndexOf(EXTENSION_FOR_TITLE_TEXT)), eEnc)));
455     }
456     if (ERRCODE_NONE != (nErr = pStream->GetError()))
457         return nErr;
458     pStream->WriteLine("---");
459     if (ERRCODE_NONE != (nErr = pStream->GetError()))
460         return nErr;
461     for (const Reference<XDictionaryEntry> & aEntrie : aEntries)
462     {
463         OString aOutStr = formatForSave(aEntrie, eEnc);
464         pStream->WriteLine (aOutStr);
465         if (ERRCODE_NONE != (nErr = pStream->GetError()))
466             return nErr;
467     }
468 
469     try
470     {
471         pStream.reset();
472         uno::Reference< ucb::XSimpleFileAccess3 > xAccess(ucb::SimpleFileAccess::create(xContext));
473         Reference<io::XInputStream> xInputStream(xStream, UNO_QUERY_THROW);
474         uno::Reference<io::XSeekable> xSeek(xInputStream, UNO_QUERY_THROW);
475         xSeek->seek(0);
476         xAccess->writeFile(rURL, xInputStream);
477         //If we are migrating from an older version, then on first successful
478         //write, we're now converted to the latest version, i.e. DIC_VERSION_7
479         nDicVersion = DIC_VERSION_7;
480     }
481     catch (const uno::Exception &)
482     {
483         DBG_ASSERT( false, "failed to write stream" );
484         return ErrCode(sal_uInt32(-1));
485     }
486 
487     return nErr;
488 }
489 
490 void DictionaryNeo::launchEvent(sal_Int16 nEvent,
491                                 const uno::Reference< XDictionaryEntry >& xEntry)
492 {
493     MutexGuard  aGuard( GetLinguMutex() );
494 
495     DictionaryEvent aEvt;
496     aEvt.Source = uno::Reference< XDictionary >( this );
497     aEvt.nEvent = nEvent;
498     aEvt.xDictionaryEntry = xEntry;
499 
500     aDicEvtListeners.notifyEach( &XDictionaryEventListener::processDictionaryEvent, aEvt);
501 }
502 
503 int DictionaryNeo::cmpDicEntry(std::u16string_view rWord1,
504                                std::u16string_view rWord2,
505                                bool bSimilarOnly)
506 {
507     // returns 0 if rWord1 is equal to rWord2
508     //   "     a value < 0 if rWord1 is less than rWord2
509     //   "     a value > 0 if rWord1 is greater than rWord2
510 
511     int nRes = 0;
512 
513     sal_Int32     nLen1 = rWord1.size(),
514                   nLen2 = rWord2.size();
515     if (bSimilarOnly)
516     {
517         const sal_Unicode cChar = '.';
518         if (nLen1  &&  cChar == rWord1[ nLen1 - 1 ])
519             nLen1--;
520         if (nLen2  &&  cChar == rWord2[ nLen2 - 1 ])
521             nLen2--;
522     }
523 
524     const sal_Unicode cIgnChar = '=';
525     const sal_Unicode cIgnBeg = '['; // for alternative hyphenation, eg. Schif[f]fahrt, Zuc[1k]ker
526     const sal_Unicode cIgnEnd = ']'; // planned: gee"[1-/e]rfde or ge[-/1e]e"rfde (gee"rfde -> ge=erfde)
527     sal_Int32       nIdx1 = 0,
528                   nIdx2 = 0,
529                   nNumIgnChar1 = 0,
530                   nNumIgnChar2 = 0;
531 
532     bool IgnState;
533     sal_Int32 nDiff = 0;
534     sal_Unicode cChar1 = '\0';
535     sal_Unicode cChar2 = '\0';
536     do
537     {
538         // skip chars to be ignored
539         IgnState = false;
540         while (nIdx1 < nLen1)
541         {
542             cChar1 = rWord1[ nIdx1 ];
543             if (cChar1 != cIgnChar && cChar1 != cIgnBeg && !IgnState )
544                 break;
545             if ( cChar1 == cIgnBeg )
546                 IgnState = true;
547             else if (cChar1 == cIgnEnd)
548                 IgnState = false;
549             nIdx1++;
550             nNumIgnChar1++;
551         }
552         IgnState = false;
553         while (nIdx2 < nLen2)
554         {
555             cChar2 = rWord2[ nIdx2 ];
556             if (cChar2 != cIgnChar && cChar2 != cIgnBeg && !IgnState )
557                 break;
558             if ( cChar2 == cIgnBeg )
559                 IgnState = true;
560             else if (cChar2 == cIgnEnd)
561                 IgnState = false;
562             nIdx2++;
563             nNumIgnChar2++;
564         }
565 
566         if (nIdx1 < nLen1  &&  nIdx2 < nLen2)
567         {
568             nDiff = cChar1 - cChar2;
569             if (nDiff)
570                 break;
571             nIdx1++;
572             nIdx2++;
573         }
574     } while (nIdx1 < nLen1  &&  nIdx2 < nLen2);
575 
576 
577     if (nDiff)
578         nRes = nDiff;
579     else
580     {   // the string with the smallest count of not ignored chars is the
581         // shorter one
582 
583         // count remaining IgnChars
584         IgnState = false;
585         while (nIdx1 < nLen1 )
586         {
587             if (rWord1[ nIdx1 ] == cIgnBeg)
588                 IgnState = true;
589             if (IgnState || rWord1[ nIdx1 ] == cIgnChar)
590                 nNumIgnChar1++;
591             if (rWord1[ nIdx1] == cIgnEnd)
592                 IgnState = false;
593             nIdx1++;
594         }
595         IgnState = false;
596         while (nIdx2 < nLen2 )
597         {
598             if (rWord2[ nIdx2 ] == cIgnBeg)
599                 IgnState = true;
600             if (IgnState || rWord2[ nIdx2 ] == cIgnChar)
601                 nNumIgnChar2++;
602             if (rWord2[ nIdx2 ] == cIgnEnd)
603                 IgnState = false;
604             nIdx2++;
605         }
606 
607         nRes = (nLen1 - nNumIgnChar1) - (nLen2 - nNumIgnChar2);
608     }
609 
610     return nRes;
611 }
612 
613 bool DictionaryNeo::seekEntry(std::u16string_view rWord,
614                               sal_Int32 *pPos, bool bSimilarOnly)
615 {
616     // look for entry with binary search.
617     // return sal_True if found sal_False else.
618     // if pPos != NULL it will become the position of the found entry, or
619     // if that was not found the position where it has to be inserted
620     // to keep the entries sorted
621 
622     MutexGuard  aGuard( GetLinguMutex() );
623 
624     sal_Int32 nUpperIdx = getCount(),
625           nMidIdx,
626           nLowerIdx = 0;
627     if( nUpperIdx > 0 )
628     {
629         nUpperIdx--;
630         while( nLowerIdx <= nUpperIdx )
631         {
632             nMidIdx = (nLowerIdx + nUpperIdx) / 2;
633             DBG_ASSERT(aEntries[nMidIdx].is(), "lng : empty entry encountered");
634 
635             int nCmp = - cmpDicEntry( aEntries[nMidIdx]->getDictionaryWord(),
636                                       rWord, bSimilarOnly );
637             if(nCmp == 0)
638             {
639                 if( pPos ) *pPos = nMidIdx;
640                 return true;
641             }
642             else if(nCmp > 0)
643                 nLowerIdx = nMidIdx + 1;
644             else if( nMidIdx == 0 )
645             {
646                 if( pPos ) *pPos = nLowerIdx;
647                 return false;
648             }
649             else
650                 nUpperIdx = nMidIdx - 1;
651         }
652     }
653     if( pPos ) *pPos = nLowerIdx;
654     return false;
655 }
656 
657 bool DictionaryNeo::isSorted()
658 {
659     bool bRes = true;
660 
661     sal_Int32 nEntries = getCount();
662     sal_Int32 i;
663     for (i = 1;  i < nEntries;  i++)
664     {
665         if (cmpDicEntry( aEntries[i-1]->getDictionaryWord(),
666                          aEntries[i]->getDictionaryWord() ) > 0)
667         {
668             bRes = false;
669             break;
670         }
671     }
672     return bRes;
673 }
674 
675 bool DictionaryNeo::addEntry_Impl(const uno::Reference< XDictionaryEntry >& xDicEntry,
676         bool bIsLoadEntries)
677 {
678     MutexGuard  aGuard( GetLinguMutex() );
679 
680     bool bRes = false;
681 
682     if ( bIsLoadEntries || (!bIsReadonly  &&  xDicEntry.is()) )
683     {
684         bool bIsNegEntry = xDicEntry->isNegative();
685         bool bAddEntry   = !isFull() &&
686                    (   ( eDicType == DictionaryType_POSITIVE && !bIsNegEntry )
687                     || ( eDicType == DictionaryType_NEGATIVE &&  bIsNegEntry )
688                     || ( eDicType == DictionaryType_MIXED ) );
689 
690         // look for position to insert entry at
691         // if there is already an entry do not insert the new one
692         sal_Int32 nPos = 0;
693         if (bAddEntry)
694         {
695             const bool bFound = seekEntry( xDicEntry->getDictionaryWord(), &nPos );
696             if (bFound)
697                 bAddEntry = false;
698         }
699 
700         if (bAddEntry)
701         {
702             DBG_ASSERT(!bNeedEntries, "lng : entries still not loaded");
703 
704             // insert new entry at specified position
705             aEntries.insert(aEntries.begin() + nPos, xDicEntry);
706             SAL_WARN_IF(!isSorted(), "linguistic", "dictionary entries unsorted");
707 
708             bIsModified = true;
709             bRes = true;
710 
711             if (!bIsLoadEntries)
712                 launchEvent( DictionaryEventFlags::ADD_ENTRY, xDicEntry );
713         }
714     }
715 
716     // add word to the Hunspell dictionary using a sample word for affixation/compounding
717     if (xDicEntry.is() && !xDicEntry->isNegative() && !xDicEntry->getReplacementText().isEmpty()) {
718         uno::Reference< XLinguServiceManager2 > xLngSvcMgr( GetLngSvcMgr_Impl() );
719         uno::Reference< XSpellChecker1 > xSpell;
720         Reference< XSpellAlternatives > xTmpRes;
721         xSpell.set( xLngSvcMgr->getSpellChecker(), UNO_QUERY );
722         Sequence< css::beans::PropertyValue > aEmptySeq;
723         if (xSpell.is() && (xSpell->isValid( SPELLML_SUPPORT, static_cast<sal_uInt16>(nLanguage), aEmptySeq )))
724         {
725             // "Grammar By" sample word is a Hunspell dictionary word?
726             if (xSpell->isValid( xDicEntry->getReplacementText(), static_cast<sal_uInt16>(nLanguage), aEmptySeq ))
727             {
728                 xTmpRes = xSpell->spell( "<?xml?><query type='add'><word>" +
729                     xDicEntry->getDictionaryWord() + "</word><word>" + xDicEntry->getReplacementText() +
730                     "</word></query>", static_cast<sal_uInt16>(nLanguage), aEmptySeq );
731                 bRes = true;
732             } else
733                 bRes = false;
734         }
735     }
736 
737     return bRes;
738 }
739 
740 OUString SAL_CALL DictionaryNeo::getName(  )
741 {
742     MutexGuard  aGuard( GetLinguMutex() );
743     return aDicName;
744 }
745 
746 void SAL_CALL DictionaryNeo::setName( const OUString& aName )
747 {
748     MutexGuard  aGuard( GetLinguMutex() );
749 
750     if (aDicName != aName)
751     {
752         aDicName = aName;
753         launchEvent(DictionaryEventFlags::CHG_NAME, nullptr);
754     }
755 }
756 
757 DictionaryType SAL_CALL DictionaryNeo::getDictionaryType(  )
758 {
759     MutexGuard  aGuard( GetLinguMutex() );
760 
761     return eDicType;
762 }
763 
764 void SAL_CALL DictionaryNeo::setActive( sal_Bool bActivate )
765 {
766     MutexGuard  aGuard( GetLinguMutex() );
767 
768     if (bIsActive == bool(bActivate))
769         return;
770 
771     bIsActive = bActivate;
772     sal_Int16 nEvent = bIsActive ?
773             DictionaryEventFlags::ACTIVATE_DIC : DictionaryEventFlags::DEACTIVATE_DIC;
774 
775     // remove entries from memory if dictionary is deactivated
776     if (!bIsActive)
777     {
778         bool bIsEmpty = aEntries.empty();
779 
780         // save entries first if necessary
781         if (bIsModified && hasLocation() && !isReadonly())
782         {
783             store();
784 
785             aEntries.clear();
786             bNeedEntries = !bIsEmpty;
787         }
788         DBG_ASSERT( !bIsModified || !hasLocation() || isReadonly(),
789                 "lng : dictionary is still modified" );
790     }
791 
792     launchEvent(nEvent, nullptr);
793 }
794 
795 sal_Bool SAL_CALL DictionaryNeo::isActive(  )
796 {
797     MutexGuard  aGuard( GetLinguMutex() );
798     return bIsActive;
799 }
800 
801 sal_Int32 SAL_CALL DictionaryNeo::getCount(  )
802 {
803     MutexGuard  aGuard( GetLinguMutex() );
804 
805     if (bNeedEntries)
806         loadEntries( aMainURL );
807     return static_cast<sal_Int32>(aEntries.size());
808 }
809 
810 Locale SAL_CALL DictionaryNeo::getLocale(  )
811 {
812     MutexGuard  aGuard( GetLinguMutex() );
813     return LanguageTag::convertToLocale( nLanguage );
814 }
815 
816 void SAL_CALL DictionaryNeo::setLocale( const Locale& aLocale )
817 {
818     MutexGuard  aGuard( GetLinguMutex() );
819     LanguageType nLanguageP = LinguLocaleToLanguage( aLocale );
820     if (!bIsReadonly  &&  nLanguage != nLanguageP)
821     {
822         nLanguage = nLanguageP;
823         bIsModified = true; // new language needs to be saved with dictionary
824 
825         launchEvent( DictionaryEventFlags::CHG_LANGUAGE, nullptr );
826     }
827 }
828 
829 uno::Reference< XDictionaryEntry > SAL_CALL DictionaryNeo::getEntry(
830             const OUString& aWord )
831 {
832     MutexGuard  aGuard( GetLinguMutex() );
833 
834     if (bNeedEntries)
835         loadEntries( aMainURL );
836 
837     sal_Int32 nPos;
838     bool bFound = seekEntry( aWord, &nPos, true );
839     DBG_ASSERT(!bFound || nPos < static_cast<sal_Int32>(aEntries.size()), "lng : index out of range");
840 
841     return bFound ? aEntries[ nPos ]
842                     : uno::Reference< XDictionaryEntry >();
843 }
844 
845 sal_Bool SAL_CALL DictionaryNeo::addEntry(
846             const uno::Reference< XDictionaryEntry >& xDicEntry )
847 {
848     MutexGuard  aGuard( GetLinguMutex() );
849 
850     bool bRes = false;
851 
852     if (!bIsReadonly)
853     {
854         if (bNeedEntries)
855             loadEntries( aMainURL );
856         bRes = addEntry_Impl( xDicEntry );
857     }
858 
859     return bRes;
860 }
861 
862 sal_Bool SAL_CALL
863     DictionaryNeo::add( const OUString& rWord, sal_Bool bIsNegative,
864             const OUString& rRplcText )
865 {
866     MutexGuard  aGuard( GetLinguMutex() );
867 
868     bool bRes = false;
869 
870     if (!bIsReadonly)
871     {
872         uno::Reference< XDictionaryEntry > xEntry =
873                 new DicEntry( rWord, bIsNegative, rRplcText );
874         bRes = addEntry_Impl( xEntry );
875     }
876 
877     return bRes;
878 }
879 
880 sal_Bool SAL_CALL DictionaryNeo::remove( const OUString& aWord )
881 {
882     MutexGuard  aGuard( GetLinguMutex() );
883 
884     bool bRemoved = false;
885 
886     if (!bIsReadonly)
887     {
888         if (bNeedEntries)
889             loadEntries( aMainURL );
890 
891         sal_Int32 nPos;
892         bool bFound = seekEntry( aWord, &nPos );
893         DBG_ASSERT(!bFound || nPos < static_cast<sal_Int32>(aEntries.size()), "lng : index out of range");
894 
895         // remove element if found
896         if (bFound)
897         {
898             // entry to be removed
899             uno::Reference< XDictionaryEntry >
900                     xDicEntry( aEntries[ nPos ] );
901             DBG_ASSERT(xDicEntry.is(), "lng : dictionary entry is NULL");
902 
903             aEntries.erase(aEntries.begin() + nPos);
904 
905             bRemoved = bIsModified = true;
906 
907             launchEvent( DictionaryEventFlags::DEL_ENTRY, xDicEntry );
908         }
909     }
910 
911     return bRemoved;
912 }
913 
914 sal_Bool SAL_CALL DictionaryNeo::isFull(  )
915 {
916     MutexGuard  aGuard( GetLinguMutex() );
917 
918     if (bNeedEntries)
919         loadEntries( aMainURL );
920     return aEntries.size() >= DIC_MAX_ENTRIES;
921 }
922 
923 uno::Sequence< uno::Reference< XDictionaryEntry > >
924     SAL_CALL DictionaryNeo::getEntries(  )
925 {
926     MutexGuard  aGuard( GetLinguMutex() );
927 
928     if (bNeedEntries)
929         loadEntries( aMainURL );
930     return comphelper::containerToSequence(aEntries);
931 }
932 
933 
934 void SAL_CALL DictionaryNeo::clear(  )
935 {
936     MutexGuard  aGuard( GetLinguMutex() );
937 
938     if (!bIsReadonly && !aEntries.empty())
939     {
940         // release all references to old entries
941         aEntries.clear();
942 
943         bNeedEntries = false;
944         bIsModified = true;
945 
946         launchEvent( DictionaryEventFlags::ENTRIES_CLEARED , nullptr );
947     }
948 }
949 
950 sal_Bool SAL_CALL DictionaryNeo::addDictionaryEventListener(
951             const uno::Reference< XDictionaryEventListener >& xListener )
952 {
953     MutexGuard  aGuard( GetLinguMutex() );
954 
955     bool bRes = false;
956     if (xListener.is())
957     {
958         sal_Int32   nLen = aDicEvtListeners.getLength();
959         bRes = aDicEvtListeners.addInterface( xListener ) != nLen;
960     }
961     return bRes;
962 }
963 
964 sal_Bool SAL_CALL DictionaryNeo::removeDictionaryEventListener(
965             const uno::Reference< XDictionaryEventListener >& xListener )
966 {
967     MutexGuard  aGuard( GetLinguMutex() );
968 
969     bool bRes = false;
970     if (xListener.is())
971     {
972         sal_Int32   nLen = aDicEvtListeners.getLength();
973         bRes = aDicEvtListeners.removeInterface( xListener ) != nLen;
974     }
975     return bRes;
976 }
977 
978 
979 sal_Bool SAL_CALL DictionaryNeo::hasLocation()
980 {
981     MutexGuard  aGuard( GetLinguMutex() );
982     return !aMainURL.isEmpty();
983 }
984 
985 OUString SAL_CALL DictionaryNeo::getLocation()
986 {
987     MutexGuard  aGuard( GetLinguMutex() );
988     return aMainURL;
989 }
990 
991 sal_Bool SAL_CALL DictionaryNeo::isReadonly()
992 {
993     MutexGuard  aGuard( GetLinguMutex() );
994 
995     return bIsReadonly;
996 }
997 
998 void SAL_CALL DictionaryNeo::store()
999 {
1000     MutexGuard  aGuard( GetLinguMutex() );
1001 
1002     if (bIsModified && hasLocation() && !isReadonly())
1003     {
1004         if (!saveEntries( aMainURL ))
1005             bIsModified = false;
1006     }
1007 }
1008 
1009 void SAL_CALL DictionaryNeo::storeAsURL(
1010             const OUString& aURL,
1011             const uno::Sequence< beans::PropertyValue >& /*rArgs*/ )
1012 {
1013     MutexGuard  aGuard( GetLinguMutex() );
1014 
1015     if (!saveEntries( aURL ))
1016     {
1017         aMainURL = aURL;
1018         bIsModified = false;
1019         bIsReadonly = IsReadOnly( getLocation() );
1020     }
1021 }
1022 
1023 void SAL_CALL DictionaryNeo::storeToURL(
1024             const OUString& aURL,
1025             const uno::Sequence< beans::PropertyValue >& /*rArgs*/ )
1026 {
1027     MutexGuard  aGuard( GetLinguMutex() );
1028     saveEntries(aURL);
1029 }
1030 
1031 
1032 DicEntry::DicEntry(const OUString &rDicFileWord,
1033                    bool bIsNegativWord)
1034 {
1035     if (!rDicFileWord.isEmpty())
1036         splitDicFileWord( rDicFileWord, aDicWord, aReplacement );
1037     bIsNegativ = bIsNegativWord;
1038 }
1039 
1040 DicEntry::DicEntry(OUString aDicWord_, bool bNegativ,
1041                    OUString aRplcText_) :
1042     aDicWord                (std::move(aDicWord_)),
1043     aReplacement            (std::move(aRplcText_)),
1044     bIsNegativ              (bNegativ)
1045 {
1046 }
1047 
1048 DicEntry::~DicEntry()
1049 {
1050 }
1051 
1052 void DicEntry::splitDicFileWord(const OUString &rDicFileWord,
1053                                 OUString &rDicWord,
1054                                 OUString &rReplacement)
1055 {
1056     sal_Int32 nDelimPos = rDicFileWord.indexOf( "==" );
1057     if (-1 != nDelimPos)
1058     {
1059         sal_Int32 nTriplePos = nDelimPos + 2;
1060         if (    nTriplePos < rDicFileWord.getLength()
1061             &&  rDicFileWord[ nTriplePos ] == '=' )
1062             ++nDelimPos;
1063         rDicWord     = rDicFileWord.copy( 0, nDelimPos );
1064         rReplacement = rDicFileWord.copy( nDelimPos + 2 );
1065     }
1066     else
1067     {
1068         rDicWord     = rDicFileWord;
1069         rReplacement.clear();
1070     }
1071 }
1072 
1073 OUString SAL_CALL DicEntry::getDictionaryWord(  )
1074 {
1075     return aDicWord;
1076 }
1077 
1078 sal_Bool SAL_CALL DicEntry::isNegative(  )
1079 {
1080     return bIsNegativ;
1081 }
1082 
1083 OUString SAL_CALL DicEntry::getReplacementText(  )
1084 {
1085     return aReplacement;
1086 }
1087 
1088 
1089 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
1090