@@ -117,10 +117,7 @@ def word_filter(self, word: str) -> bool:
117
117
"""Return true if the target word should be registered in the search index.
118
118
This method is called after stemming.
119
119
"""
120
- return len (word ) == 0 or not (
121
- ((len (word ) < 3 ) and (12353 < ord (word [0 ]) < 12436 ))
122
- or (ord (word [0 ]) < 256 and (word in self .stopwords ))
123
- )
120
+ return not word .isdigit () and word not in self .stopwords
124
121
125
122
126
123
# SearchEnglish imported after SearchLanguage is defined due to circular import
@@ -583,17 +580,17 @@ def get_js_stemmer_rawcode(self) -> str | None:
583
580
584
581
def get_js_stemmer_code (self ) -> str :
585
582
"""Returns JS code that will be inserted into language_data.js."""
586
- if self .lang .js_stemmer_rawcode :
587
- base_js_path = _NON_MINIFIED_JS_PATH / 'base-stemmer.js'
588
- language_js_path = _NON_MINIFIED_JS_PATH / self .lang .js_stemmer_rawcode
589
- base_js = base_js_path .read_text (encoding = 'utf-8' )
590
- language_js = language_js_path .read_text (encoding = 'utf-8' )
591
- return (
592
- f'{ base_js } \n { language_js } \n Stemmer = { self .lang .language_name } Stemmer;'
593
- )
594
- else :
583
+ if not self .lang .js_stemmer_rawcode :
595
584
return self .lang .js_stemmer_code
596
585
586
+ base_js_path = _MINIFIED_JS_PATH / 'base-stemmer.js'
587
+ language_js_path = _MINIFIED_JS_PATH / self .lang .js_stemmer_rawcode
588
+ return '\n ' .join ((
589
+ base_js_path .read_text (encoding = 'utf-8' ),
590
+ language_js_path .read_text (encoding = 'utf-8' ),
591
+ f'window.Stemmer = { self .lang .language_name } Stemmer;' ,
592
+ ))
593
+
597
594
598
595
def _feed_visit_nodes (
599
596
node : nodes .Node ,
0 commit comments