1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  *   Licensed to the Apache Software Foundation (ASF) under one or more
12  *   contributor license agreements. See the NOTICE file distributed
13  *   with this work for additional information regarding copyright
14  *   ownership. The ASF licenses this file to you under the Apache
15  *   License, Version 2.0 (the "License"); you may not use this file
16  *   except in compliance with the License. You may obtain a copy of
17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <com/sun/star/uno/Reference.h>
21 #include <cppuhelper/factory.hxx>
22 #include <cppuhelper/supportsservice.hxx>
23 #include <cppuhelper/weak.hxx>
24 #include <com/sun/star/linguistic2/LinguServiceManager.hpp>
25 #include <com/sun/star/linguistic2/XLinguProperties.hpp>
26 #include <com/sun/star/linguistic2/XSpellChecker1.hpp>
27 #include <i18nlangtag/languagetag.hxx>
28 #include <tools/debug.hxx>
29 #include <comphelper/lok.hxx>
30 #include <comphelper/processfactory.hxx>
31 #include <comphelper/sequence.hxx>
32 #include <osl/mutex.hxx>
33 #include <osl/thread.h>
34 #include <unotools/lingucfg.hxx>
35 #include <unotools/resmgr.hxx>
36 
37 #include <rtl/string.hxx>
38 #include <rtl/textenc.h>
39 
40 #include <svtools/strings.hrc>
41 
42 #include "nthesimp.hxx"
43 #include <linguistic/misc.hxx>
44 #include "nthesdta.hxx"
45 
46 #include <vector>
47 #include <numeric>
48 #include <set>
49 #include <string.h>
50 
51 // XML-header to query SPELLML support
52 constexpr OUStringLiteral SPELLML_SUPPORT = u"<?xml?>";
53 
54 using namespace osl;
55 using namespace com::sun::star;
56 using namespace com::sun::star::beans;
57 using namespace com::sun::star::lang;
58 using namespace com::sun::star::uno;
59 using namespace com::sun::star::linguistic2;
60 using namespace linguistic;
61 
GetLngSvcMgr_Impl()62 static uno::Reference< XLinguServiceManager2 > GetLngSvcMgr_Impl()
63 {
64     uno::Reference< XComponentContext > xContext( comphelper::getProcessComponentContext() );
65     uno::Reference< XLinguServiceManager2 > xRes = LinguServiceManager::create( xContext ) ;
66     return xRes;
67 }
68 
Thesaurus()69 Thesaurus::Thesaurus() :
70     aEvtListeners   ( GetLinguMutex() ), pPropHelper(nullptr), bDisposing(false),
71     prevLocale(LANGUAGE_DONTKNOW)
72 {
73 }
74 
~Thesaurus()75 Thesaurus::~Thesaurus()
76 {
77     mvThesInfo.clear();
78     if (pPropHelper)
79     {
80         pPropHelper->RemoveAsPropListener();
81     }
82 }
83 
GetPropHelper_Impl()84 PropertyHelper_Thesaurus& Thesaurus::GetPropHelper_Impl()
85 {
86     if (!pPropHelper)
87     {
88         Reference< XLinguProperties >   xPropSet = GetLinguProperties();
89 
90         pPropHelper = new PropertyHelper_Thesaurus( static_cast<XThesaurus *>(this), xPropSet );
91         pPropHelper->AddAsPropListener();   //! after a reference is established
92     }
93     return *pPropHelper;
94 }
95 
getLocales()96 Sequence< Locale > SAL_CALL Thesaurus::getLocales()
97 {
98     MutexGuard  aGuard( GetLinguMutex() );
99 
100     // this routine should return the locales supported by the installed
101     // dictionaries.
102     if (mvThesInfo.empty())
103     {
104         SvtLinguConfig aLinguCfg;
105 
106         // get list of dictionaries-to-use
107         std::vector< SvtLinguConfigDictionaryEntry > aDics;
108         uno::Sequence< OUString > aFormatList;
109         aLinguCfg.GetSupportedDictionaryFormatsFor( u"Thesauri"_ustr,
110                 u"org.openoffice.lingu.new.Thesaurus"_ustr, aFormatList );
111         for (const auto& rFormat : aFormatList)
112         {
113             std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
114                     aLinguCfg.GetActiveDictionariesByFormat( rFormat ) );
115             aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
116         }
117 
118         //!! for compatibility with old dictionaries (the ones not using extensions
119         //!! or new configuration entries, but still using the dictionary.lst file)
120         //!! Get the list of old style spell checking dictionaries to use...
121         std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
122                 GetOldStyleDics( "THES" ) );
123 
124         // to prefer dictionaries with configuration entries we will only
125         // use those old style dictionaries that add a language that
126         // is not yet supported by the list of new style dictionaries
127         MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
128 
129         if (!aDics.empty())
130         {
131             // get supported locales from the dictionaries-to-use...
132             std::set<OUString> aLocaleNamesSet;
133             for (auto const& dict : aDics)
134             {
135                 for (const auto& rLocaleName : dict.aLocaleNames)
136                 {
137                     if (!comphelper::LibreOfficeKit::isAllowlistedLanguage(rLocaleName))
138                         continue;
139 
140                     aLocaleNamesSet.insert( rLocaleName );
141                 }
142             }
143             // ... and add them to the resulting sequence
144             std::vector<Locale> aLocalesVec;
145             aLocalesVec.reserve(aLocaleNamesSet.size());
146 
147             std::transform(aLocaleNamesSet.begin(), aLocaleNamesSet.end(), std::back_inserter(aLocalesVec),
148                 [](const OUString& localeName) -> Locale { return LanguageTag::convertToLocale(localeName); });
149 
150             aSuppLocales = comphelper::containerToSequence(aLocalesVec);
151 
152             //! For each dictionary and each locale we need a separate entry.
153             //! If this results in more than one dictionary per locale than (for now)
154             //! it is undefined which dictionary gets used.
155             //! In the future the implementation should support using several dictionaries
156             //! for one locale.
157             sal_Int32 numthes = std::accumulate(aDics.begin(), aDics.end(), 0,
158                 [](const sal_Int32 nSum, const SvtLinguConfigDictionaryEntry& dict) {
159                     return nSum + dict.aLocaleNames.getLength(); });
160 
161             // add dictionary information
162             mvThesInfo.resize(numthes);
163 
164             sal_Int32 k = 0;
165             for (auto const& dict : aDics)
166             {
167                 if (dict.aLocaleNames.hasElements() &&
168                     dict.aLocations.hasElements())
169                 {
170                     // currently only one language per dictionary is supported in the actual implementation...
171                     // Thus here we work-around this by adding the same dictionary several times.
172                     // Once for each of its supported locales.
173                     for (const auto& rLocaleName : dict.aLocaleNames)
174                     {
175                         LanguageTag aLanguageTag(rLocaleName);
176                         mvThesInfo[k].aEncoding = RTL_TEXTENCODING_DONTKNOW;
177                         mvThesInfo[k].aLocale  = aLanguageTag.getLocale();
178                         mvThesInfo[k].aCharSetInfo.reset( new CharClass( std::move(aLanguageTag) ) );
179                         // also both files have to be in the same directory and the
180                         // file names must only differ in the extension (.aff/.dic).
181                         // Thus we use the first location only and strip the extension part.
182                         OUString aLocation = dict.aLocations[0];
183                         sal_Int32 nPos = aLocation.lastIndexOf( '.' );
184                         aLocation = aLocation.copy( 0, nPos );
185                         mvThesInfo[k].aName = aLocation;
186 
187                         ++k;
188                     }
189                 }
190             }
191             DBG_ASSERT( k == numthes, "index mismatch?" );
192         }
193         else
194         {
195             /* no dictionary found so register no dictionaries */
196             mvThesInfo.clear();
197             aSuppLocales.realloc(0);
198         }
199     }
200 
201     return aSuppLocales;
202 }
203 
hasLocale(const Locale & rLocale)204 sal_Bool SAL_CALL Thesaurus::hasLocale(const Locale& rLocale)
205 {
206     MutexGuard  aGuard( GetLinguMutex() );
207 
208     if (!aSuppLocales.hasElements())
209         getLocales();
210 
211     return comphelper::findValue(aSuppLocales, rLocale) != -1;
212 }
213 
queryMeanings(const OUString & qTerm,const Locale & rLocale,const css::uno::Sequence<css::beans::PropertyValue> & rProperties)214 Sequence < Reference < css::linguistic2::XMeaning > > SAL_CALL Thesaurus::queryMeanings(
215     const OUString& qTerm, const Locale& rLocale,
216     const css::uno::Sequence< css::beans::PropertyValue >& rProperties)
217 {
218     MutexGuard      aGuard( GetLinguMutex() );
219 
220     uno::Sequence< Reference< XMeaning > > aMeanings( 1 );
221     uno::Sequence< Reference< XMeaning > > noMeanings( 0 );
222     uno::Reference< XLinguServiceManager2 > xLngSvcMgr( GetLngSvcMgr_Impl() );
223     uno::Reference< XSpellChecker1 > xSpell;
224 
225     OUString aRTerm(qTerm);
226     OUString aPTerm(qTerm);
227     CapType ct = CapType::UNKNOWN;
228     sal_Int32 stem = 0;
229     sal_Int32 stem2 = 0;
230 
231     LanguageType nLanguage = LinguLocaleToLanguage( rLocale );
232 
233     if (LinguIsUnspecified( nLanguage) || aRTerm.isEmpty())
234         return noMeanings;
235 
236     if (!hasLocale( rLocale ))
237 #ifdef LINGU_EXCEPTIONS
238         throw( IllegalArgumentException() );
239 #else
240         return noMeanings;
241 #endif
242 
243     if (prevTerm == qTerm && prevLocale == nLanguage)
244         return prevMeanings;
245 
246     mentry * pmean = nullptr;
247     sal_Int32 nmean = 0;
248 
249     PropertyHelper_Thesaurus &rHelper = GetPropHelper();
250     rHelper.SetTmpPropVals( rProperties );
251 
252     MyThes * pTH = nullptr;
253     rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
254     CharClass * pCC = nullptr;
255 
256     // find the first thesaurus that matches the locale
257     for (size_t i =0; i < mvThesInfo.size(); i++)
258     {
259         if (rLocale == mvThesInfo[i].aLocale)
260         {
261             // open up and initialize this thesaurus if need be
262             if (!mvThesInfo[i].aThes)
263             {
264                 OUString datpath = mvThesInfo[i].aName + ".dat";
265                 OUString idxpath = mvThesInfo[i].aName + ".idx";
266                 OUString ndat;
267                 OUString nidx;
268                 osl::FileBase::getSystemPathFromFileURL(datpath,ndat);
269                 osl::FileBase::getSystemPathFromFileURL(idxpath,nidx);
270 
271 #if defined(_WIN32)
272                 // MyThes waits UTF-8 encoded paths with \\?\ long path prefix.
273                 OString aTmpidx = Win_AddLongPathPrefix(OUStringToOString(nidx, RTL_TEXTENCODING_UTF8));
274                 OString aTmpdat = Win_AddLongPathPrefix(OUStringToOString(ndat, RTL_TEXTENCODING_UTF8));
275 #else
276                 OString aTmpidx(OU2ENC(nidx,osl_getThreadTextEncoding()));
277                 OString aTmpdat(OU2ENC(ndat,osl_getThreadTextEncoding()));
278 #endif
279 
280                 mvThesInfo[i].aThes.reset( new MyThes(aTmpidx.getStr(),aTmpdat.getStr()) );
281                 mvThesInfo[i].aEncoding = getTextEncodingFromCharset(mvThesInfo[i].aThes->get_th_encoding());
282             }
283             pTH = mvThesInfo[i].aThes.get();
284             eEnc = mvThesInfo[i].aEncoding;
285             pCC = mvThesInfo[i].aCharSetInfo.get();
286 
287             if (pTH)
288                 break;
289         }
290     }
291 
292     // we don't want to work with a default text encoding since following incorrect
293     // results may occur only for specific text and thus may be hard to notice.
294     // Thus better always make a clean exit here if the text encoding is in question.
295     // Hopefully something not working at all will raise proper attention quickly. ;-)
296     DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
297     if (eEnc == RTL_TEXTENCODING_DONTKNOW)
298         return noMeanings;
299 
300     while (pTH)
301     {
302         // convert word to all lower case for searching
303         if (!stem)
304             ct = capitalType(aRTerm, pCC);
305         OUString nTerm(makeLowerCase(aRTerm, pCC));
306         OString aTmp( OU2ENC(nTerm, eEnc) );
307         nmean = pTH->Lookup(aTmp.getStr(),aTmp.getLength(),&pmean);
308 
309         if (nmean)
310             aMeanings.realloc( nmean );
311 
312         mentry * pe = pmean;
313         OUString codeTerm = qTerm;
314         Reference< XSpellAlternatives > xTmpRes2;
315 
316         if (stem)
317         {
318             xTmpRes2 = xSpell->spell( "<?xml?><query type='analyze'><word>" +
319                                       aPTerm + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties );
320             if (xTmpRes2.is())
321             {
322                 Sequence<OUString>seq = xTmpRes2->getAlternatives();
323                 if (seq.hasElements())
324                 {
325                     codeTerm = seq[0];
326                     stem2 = 1;
327                 }
328             }
329         }
330 
331         for (int j = 0; j < nmean; j++)
332         {
333             int count = pe->count;
334             if (count)
335             {
336                 Sequence< OUString > aStr( count );
337                 OUString *pStr = aStr.getArray();
338 
339                 for (int i=0; i < count; i++)
340                 {
341                     OUString sTerm(pe->psyns[i],strlen(pe->psyns[i]),eEnc );
342                     sal_Int32 catpos = sTerm.indexOf('(');
343                     OUString catst;
344                     if (catpos > 2)
345                     {
346                         // remove category name for affixation and casing
347                         catst = OUString::Concat(" ") + sTerm.subView(catpos);
348                         sTerm = sTerm.copy(0, catpos);
349                         sTerm = sTerm.trim();
350                     }
351                     // generate synonyms with affixes
352                     if (stem && stem2)
353                     {
354                         Reference< XSpellAlternatives > xTmpRes = xSpell->spell( "<?xml?><query type='generate'><word>" +
355                             sTerm + "</word>" + codeTerm + "</query>", static_cast<sal_uInt16>(nLanguage), rProperties );
356                         if (xTmpRes.is())
357                         {
358                             Sequence<OUString>seq = xTmpRes->getAlternatives();
359                             if (seq.hasElements())
360                                 sTerm = seq[0];
361                         }
362                     }
363 
364                     CapType ct1 = capitalType(sTerm, pCC);
365                     if (CapType::MIXED == ct1)
366                         ct = ct1;
367                     OUString cTerm;
368                     switch (ct)
369                     {
370                         case CapType::ALLCAP:
371                             cTerm = makeUpperCase(sTerm, pCC);
372                             break;
373                         case CapType::INITCAP:
374                             cTerm = makeInitCap(sTerm, pCC);
375                             break;
376                         default:
377                             cTerm = sTerm;
378                             break;
379                     }
380                     pStr[i] = OUString( cTerm + catst);
381                 }
382                 rtl::Reference<Meaning> pMn = new Meaning(aRTerm);
383                 OUString dTerm(pe->defn,strlen(pe->defn),eEnc );
384                 pMn->SetMeaning(dTerm);
385                 pMn->SetSynonyms(aStr);
386                 Reference<XMeaning>* pMeaning = aMeanings.getArray();
387                 pMeaning[j] = pMn;
388             }
389             pe++;
390         }
391         pTH->CleanUpAfterLookup(&pmean,nmean);
392 
393         if (nmean)
394         {
395             prevTerm = qTerm;
396             prevMeanings = aMeanings;
397             prevLocale = nLanguage;
398             return aMeanings;
399         }
400 
401         if (stem || !xLngSvcMgr.is())
402             return noMeanings;
403         stem = 1;
404 
405         xSpell.set( xLngSvcMgr->getSpellChecker(), UNO_QUERY );
406         if (!xSpell.is() || !xSpell->isValid( SPELLML_SUPPORT, static_cast<sal_uInt16>(nLanguage), rProperties ))
407             return noMeanings;
408         Reference< XSpellAlternatives > xTmpRes = xSpell->spell( "<?xml?><query type='stem'><word>" +
409             aRTerm + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties );
410         if (xTmpRes.is())
411         {
412             Sequence<OUString>seq = xTmpRes->getAlternatives();
413             if (seq.hasElements())
414             {
415                 aRTerm = seq[0];  // XXX Use only the first stem
416                 continue;
417             }
418         }
419 
420         // stem the last word of the synonym (for categories after affixation)
421         aRTerm = aRTerm.trim();
422         sal_Int32 pos = aRTerm.lastIndexOf(' ');
423         if (!pos)
424             return noMeanings;
425         xTmpRes = xSpell->spell( OUString::Concat("<?xml?><query type='stem'><word>") +
426             aRTerm.subView(pos + 1) + "</word></query>", static_cast<sal_uInt16>(nLanguage), rProperties );
427         if (xTmpRes.is())
428         {
429             Sequence<OUString>seq = xTmpRes->getAlternatives();
430             if (seq.hasElements())
431             {
432                 aPTerm = aRTerm.copy(pos + 1);
433                 aRTerm = aRTerm.subView(0, pos + 1) + seq[0];
434 #if  0
435                 for (int i = 0; i < seq.getLength(); i++)
436                 {
437                     OString o = OUStringToOString(seq[i], RTL_TEXTENCODING_UTF8);
438                     fprintf(stderr, "%d: %s\n", i + 1, o.pData->buffer);
439                 }
440 #endif
441                 continue;
442             }
443         }
444         break;
445     }
446     return noMeanings;
447 }
448 
getServiceDisplayName(const Locale & rLocale)449 OUString SAL_CALL Thesaurus::getServiceDisplayName(const Locale& rLocale)
450 {
451     std::locale loc(Translate::Create("svt", LanguageTag(rLocale)));
452     return Translate::get(STR_DESCRIPTION_MYTHES, loc);
453 }
454 
initialize(const Sequence<Any> & rArguments)455 void SAL_CALL Thesaurus::initialize( const Sequence< Any >& rArguments )
456 {
457     MutexGuard  aGuard( GetLinguMutex() );
458 
459     if (pPropHelper)
460         return;
461 
462     sal_Int32 nLen = rArguments.getLength();
463     // Accept one of two args so we can be compatible with the call site in GetAvailLocales()
464     // linguistic module
465     if (1 == nLen || 2 == nLen)
466     {
467         Reference< XLinguProperties >   xPropSet;
468         rArguments.getConstArray()[0] >>= xPropSet;
469         assert(xPropSet);
470 
471         //! Pointer allows for access of the non-UNO functions.
472         //! And the reference to the UNO-functions while increasing
473         //! the ref-count and will implicitly free the memory
474         //! when the object is no longer used.
475         pPropHelper = new PropertyHelper_Thesaurus( static_cast<XThesaurus *>(this), xPropSet );
476         pPropHelper->AddAsPropListener();   //! after a reference is established
477     }
478     else
479         OSL_FAIL( "wrong number of arguments in sequence" );
480 }
481 
makeLowerCase(const OUString & aTerm,CharClass const * pCC)482 OUString Thesaurus::makeLowerCase(const OUString& aTerm, CharClass const * pCC)
483 {
484     if (pCC)
485         return pCC->lowercase(aTerm);
486     return aTerm;
487 }
488 
makeUpperCase(const OUString & aTerm,CharClass const * pCC)489 OUString Thesaurus::makeUpperCase(const OUString& aTerm, CharClass const * pCC)
490 {
491     if (pCC)
492         return pCC->uppercase(aTerm);
493     return aTerm;
494 }
495 
makeInitCap(const OUString & aTerm,CharClass const * pCC)496 OUString Thesaurus::makeInitCap(const OUString& aTerm, CharClass const * pCC)
497 {
498     sal_Int32 tlen = aTerm.getLength();
499     if (pCC && tlen)
500     {
501         OUString bTemp = aTerm.copy(0,1);
502         if (tlen > 1)
503         {
504             return ( pCC->uppercase(bTemp, 0, 1)
505                      + pCC->lowercase(aTerm,1,(tlen-1)) );
506         }
507 
508         return pCC->uppercase(bTemp, 0, 1);
509     }
510     return aTerm;
511 }
512 
dispose()513 void SAL_CALL Thesaurus::dispose()
514 {
515     MutexGuard  aGuard( GetLinguMutex() );
516 
517     if (!bDisposing)
518     {
519         bDisposing = true;
520         EventObject aEvtObj( static_cast<XThesaurus *>(this) );
521         aEvtListeners.disposeAndClear( aEvtObj );
522         if (pPropHelper)
523         {
524             pPropHelper->RemoveAsPropListener();
525             delete pPropHelper;
526             pPropHelper = nullptr;
527         }
528     }
529 }
530 
addEventListener(const Reference<XEventListener> & rxListener)531 void SAL_CALL Thesaurus::addEventListener( const Reference< XEventListener >& rxListener )
532 {
533     MutexGuard  aGuard( GetLinguMutex() );
534 
535     if (!bDisposing && rxListener.is())
536         aEvtListeners.addInterface( rxListener );
537 }
538 
removeEventListener(const Reference<XEventListener> & rxListener)539 void SAL_CALL Thesaurus::removeEventListener( const Reference< XEventListener >& rxListener )
540 {
541     MutexGuard  aGuard( GetLinguMutex() );
542 
543     if (!bDisposing && rxListener.is())
544         aEvtListeners.removeInterface( rxListener );
545 }
546 
547 // Service specific part
getImplementationName()548 OUString SAL_CALL Thesaurus::getImplementationName()
549 {
550     return u"org.openoffice.lingu.new.Thesaurus"_ustr;
551 }
552 
supportsService(const OUString & ServiceName)553 sal_Bool SAL_CALL Thesaurus::supportsService( const OUString& ServiceName )
554 {
555     return cppu::supportsService(this, ServiceName);
556 }
557 
getSupportedServiceNames()558 Sequence< OUString > SAL_CALL Thesaurus::getSupportedServiceNames()
559 {
560     return { SN_THESAURUS };
561 }
562 
563 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
lingucomponent_Thesaurus_get_implementation(css::uno::XComponentContext *,css::uno::Sequence<css::uno::Any> const &)564 lingucomponent_Thesaurus_get_implementation(
565     css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
566 {
567     return cppu::acquire(new Thesaurus());
568 }
569 
570 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
571