Refactoring of 3rd index code, rename index, add handling of missing indexes

UlrichB22 · UlrichB22 · commit a15b8be43ed8 · 2025-04-25T10:26:28.000+02:00
diff --git a/src/moin/app.py b/src/moin/app.py
@@ -169,16 +169,8 @@ class ItemNameConverter(PathConverter):
     clock.stop("create_app flask-cache")
     # init storage
     clock.start("create_app init backends")
-    try:
-        init_backends(app)
-    except EmptyIndexError:
-        # create-instance has no index at start and index-* subcommands check the index individually
-        if info_name not in ["create-instance", "build-instance"] and not info_name.startswith("index-"):
-            clock.stop("create_app init backends")
-            clock.stop("create_app total")
-            logging.error("Error: Wiki index not found. Try 'moin help' or 'moin --help' to get further information.")
-            raise SystemExit(1)
-        logging.debug("Wiki index not found.")
+    # start init_backends
+    _init_backends(app, info_name, clock)
     clock.stop("create_app init backends")
     clock.start("create_app flask-babel")
     i18n_init(app)
@@ -212,6 +204,32 @@ def destroy_app(app):
     deinit_backends(app)
 
 
+def _init_backends(app, info_name, clock):
+    """
+    initialize the backends with exception handling
+    """
+    try:
+        init_backends(app)
+    except EmptyIndexError:
+        # create-instance has no index at start and index-* subcommands check the index individually
+        if info_name not in ["create-instance", "build-instance"] and not info_name.startswith("index-"):
+            missing_indexes = app.storage.missing_index_check()
+            if missing_indexes == "all":
+                logging.error(
+                    "Error: all wiki indexes missing. Try 'moin help' or 'moin --help' to get further information."
+                )
+            elif missing_indexes == "'latest_meta'":  # TODO: remove this check after 6-12 month
+                logging.error(
+                    "Error: Wiki index 'latest_meta' missing. Please see https://github.com/moinwiki/moin/pull/1877"
+                )
+            else:
+                logging.error(f"Error: Wiki index {missing_indexes} missing, please check.")
+            clock.stop("create_app init backends")
+            clock.stop("create_app total")
+            raise SystemExit(1)
+        logging.debug("Wiki index not found.")
+
+
 def init_backends(app, create_backend=False):
     """
     initialize the backends
diff --git a/src/moin/apps/frontend/views.py b/src/moin/apps/frontend/views.py
@@ -585,9 +585,8 @@ def wrapper(item_name, rev):
             abort(404, item_name)
         if add_trail:
             flaskg.user.add_trail(item_name, aliases=item.meta.revision.fqnames)
-        if rev == CURRENT:
-            """if view has been called with default rev=CURRENT we can avoid an index query in flash_if_item_deleted"""
-            item.is_current = CURRENT
+        """if view has been called with default rev=CURRENT we can avoid an index query in flash_if_item_deleted"""
+        item.is_current = rev == CURRENT
         return wrapped(item)
 
     return wrapper
@@ -605,7 +604,7 @@ def flash_if_item_deleted(item_name, rev_id, itemrev):
     Show flash info message if target item is deleted, show another message if revision is deleted.
     Return True if item is deleted or this revision is deleted.
     """
-    rev_id = getattr(itemrev, "is_current", rev_id)
+    rev_id = CURRENT if getattr(itemrev, "is_current", False) else rev_id
     if not rev_id == CURRENT:
         ret = False
         current_item = Item.create(item_name, rev_id=CURRENT)
diff --git a/src/moin/cli/_tests/__init__.py b/src/moin/cli/_tests/__init__.py
@@ -16,7 +16,7 @@
 
 from moin._tests import get_dirs
 from moin import log
-from moin.constants.keys import ALL_REVS, LATEST_IDX
+from moin.constants.keys import ALL_REVS, LATEST_META
 
 logging = log.getLogger(__name__)
 
@@ -134,7 +134,7 @@ def read_index_dump(out: str, latest=False):
                 item = {}
             if latest and ALL_REVS in line:
                 break
-            if LATEST_IDX in line:
+            if LATEST_META in line:
                 break
             continue
         space_index = line.index(" ")
diff --git a/src/moin/cli/maint/index.py b/src/moin/cli/maint/index.py
@@ -13,7 +13,7 @@
 from flask.cli import FlaskGroup
 
 from moin.app import create_app, init_backends
-from moin.constants.keys import LATEST_REVS, ALL_REVS, LATEST_IDX
+from moin.constants.keys import LATEST_REVS, ALL_REVS, LATEST_META
 from moin.utils.filesys import wiki_index_exists
 
 
@@ -134,7 +134,7 @@ def IndexDump(tmp, truncate):
         logging.error(ERR_NO_INDEX)
         raise SystemExit(1)
     logging.info("Index dump started")
-    for idx_name in [LATEST_REVS, ALL_REVS, LATEST_IDX]:
+    for idx_name in [LATEST_REVS, ALL_REVS, LATEST_META]:
         print(f" {'-' * 10} {idx_name} {'-' * 60}")
         for kvs in app.storage.dump(tmp=tmp, idx_name=idx_name):
             for k, v in kvs:
diff --git a/src/moin/constants/keys.py b/src/moin/constants/keys.py
@@ -170,7 +170,7 @@
 # index names
 LATEST_REVS = "latest_revs"
 ALL_REVS = "all_revs"
-LATEST_IDX = "latest_idx"
+LATEST_META = "latest_meta"
 
 # values for ACTION key
 ACTION_SAVE = "SAVE"
diff --git a/src/moin/items/__init__.py b/src/moin/items/__init__.py
@@ -83,7 +83,7 @@
     TAGS,
     TEMPLATE,
     LATEST_REVS,
-    LATEST_IDX,
+    LATEST_META,
     EDIT_ROWS,
     FQNAMES,
     USERGROUP,
@@ -1372,10 +1372,7 @@ def get_index(self, startswith=None, selected_groups=None, regex=None, short=Fal
              - one for "dirs" (direct descendents that also contain descendents)
         """
         fqname = self.fqname
-        if short:
-            idx_name = LATEST_IDX
-        else:
-            idx_name = LATEST_REVS
+        idx_name = LATEST_META if short else LATEST_REVS
         isglobalindex = not fqname.value or fqname.value == NAMESPACE_ALL
         query = self.build_index_query(startswith, selected_groups, isglobalindex)
         if not fqname.value.startswith(NAMESPACE_ALL + "/") and fqname.value != NAMESPACE_ALL:
diff --git a/src/moin/storage/middleware/indexing.py b/src/moin/storage/middleware/indexing.py
@@ -91,7 +91,7 @@
 
 
 WHOOSH_FILESTORAGE = "FileStorage"
-INDEXES = [LATEST_REVS, ALL_REVS, LATEST_IDX]
+INDEXES = [LATEST_REVS, ALL_REVS, LATEST_META]
 
 VALIDATION_HANDLING_STRICT = "strict"
 VALIDATION_HANDLING_WARN = "warn"
@@ -148,7 +148,7 @@ def search_names(name_prefix, limit=None):
     :return: item names list
     """
 
-    idx_name = LATEST_IDX
+    idx_name = LATEST_META
     q = Prefix(NAME_EXACT, name_prefix)
     with flaskg.storage.indexer.ix[idx_name].searcher() as searcher:
         results = searcher.search(q, limit=limit)
@@ -446,56 +446,37 @@ def __init__(self, index_storage, backend, acl_rights_contents=[], **kw):
         all_revs_fields = {ITEMID: ID(stored=True)}
         all_revs_fields.update(**common_fields)
 
-        # very short index for queries like has_item
-        latest_idx_fields = {
+        # Small index for the latest revisions, used for queries such as has_item, authorization checks and
+        # the +index route. This index has no content or *NGRAMS, which improves query speed for large wikis
+        latest_meta_fields = {
             # ITEMID from metadata - as there is only latest rev of same item here, it is unique
             ITEMID: ID(unique=True, stored=True),
-            # namespace, so we can have different namespaces within a wiki, always check this!
             NAMESPACE: ID(stored=True),
-            # since name is a list whoosh will think it is a list of tokens see #364
-            # we store list of names, but do not use for searching
             NAME: TEXT(stored=True),
-            # string created by joining list of Name strings, we use NAMES for searching
             NAMES: TEXT(stored=True, multitoken_query="or", analyzer=item_name_analyzer(), field_boost=30.0),
-            # unmodified NAME from metadata - use this for precise lookup by the code.
-            # also needed for wildcard search, so the original string as well as the query
-            # (with the wildcard) is not cut into pieces.
             NAME_EXACT: ID(field_boost=1.0),
-            # backend name (which backend is this rev stored in?)
-            BACKENDNAME: ID(stored=True),
-            # tokenized ACL from metadata
-            ACL: TEXT(analyzer=AclTokenizer(acl_rights_contents), multitoken_query="and", stored=True),
-            # fields for route +index --------------------------------------------
-            # revision id (aka meta id)
             REVID: ID(unique=True, stored=True),
-            # sequential revision number for humans: 1, 2, 3...
             REV_NUMBER: NUMERIC(stored=True),
-            # parent revision id
             PARENTID: ID(stored=True),
-            # MTIME from revision metadata (converted to UTC datetime)
+            BACKENDNAME: ID(stored=True),
             MTIME: DATETIME(stored=True),
-            # ITEMTYPE from metadata, always matched exactly hence ID
             ITEMTYPE: ID(stored=True),
-            # tokenized CONTENTTYPE from metadata
             CONTENTTYPE: TEXT(stored=True, multitoken_query="and", analyzer=MimeTokenizer()),
-            # USERID from metadata
             USERID: ID(stored=True),
-            # ADDRESS from metadata
             ADDRESS: ID(stored=True),
-            # HOSTNAME from metadata
             HOSTNAME: ID(stored=True),
-            # SIZE from metadata
             SIZE: NUMERIC(stored=True),
+            ACL: TEXT(analyzer=AclTokenizer(acl_rights_contents), multitoken_query="and", stored=True),
         }
 
         latest_revisions_schema = Schema(**latest_revs_fields)
         all_revisions_schema = Schema(**all_revs_fields)
-        latest_index_schema = Schema(**latest_idx_fields)
+        latest_index_schema = Schema(**latest_meta_fields)
 
         # schemas are needed by query parser and for index creation
         self.schemas[ALL_REVS] = all_revisions_schema
         self.schemas[LATEST_REVS] = latest_revisions_schema
-        self.schemas[LATEST_IDX] = latest_index_schema
+        self.schemas[LATEST_META] = latest_index_schema
 
         # Define dynamic fields
         dynamic_fields = [
@@ -558,6 +539,15 @@ def open(self):
         for name in INDEXES:
             self.ix[name] = storage.open_index(name)
 
+    def missing_index_check(self):
+        """
+        check existence of all indexes.
+        return: "all" or string with list of missing indexes
+        """
+        storage = self.get_storage()
+        missing_indexes = [name for name in INDEXES if not storage.index_exists(name)]
+        return "all" if len(missing_indexes) == len(INDEXES) else str(missing_indexes)[1:-1]
+
     def close(self):
         """
         Close all indexes.
@@ -628,7 +618,7 @@ def index_revision(self, meta, content, backend_name, async_=True, force_latest=
                     == doc[REVID]
                 )
         if is_latest:
-            for idx_name in [LATEST_REVS, LATEST_IDX]:
+            for idx_name in [LATEST_REVS, LATEST_META]:
                 doc = backend_to_index(meta, content, self.schemas[idx_name], backend_name)
                 if async_:
                     writer = AsyncWriter(self.ix[idx_name])
@@ -679,7 +669,7 @@ def remove_revision(self, revid, async_=True):
             writer = self.ix[ALL_REVS].writer()
         with writer as writer:
             writer.delete_by_term(REVID, revid)
-        for idx_name in [LATEST_REVS, LATEST_IDX]:
+        for idx_name in [LATEST_REVS, LATEST_META]:
             self.remove_index_revision(revid, async_=async_, idx_name=idx_name)
 
     def _modify_index(self, index, schema, revids, mode="add", procs=None, limitmb=None, multisegment=False):
@@ -757,7 +747,7 @@ def rebuild(self, tmp=False, procs=None, limitmb=None, multisegment=False):
             index.close()
 
         # now build the indexes for latest revisions:
-        for idx_name in [LATEST_REVS, LATEST_IDX]:
+        for idx_name in [LATEST_REVS, LATEST_META]:
             index = storage.open_index(idx_name)
             try:
                 self._modify_index(
@@ -809,8 +799,8 @@ def update(self, tmp=False):
         finally:
             index_all.close()
 
-        # update LATEST_REVS and LATEST_IDX
-        for idx_name in [LATEST_REVS, LATEST_IDX]:
+        # update LATEST_REVS and LATEST_META
+        for idx_name in [LATEST_REVS, LATEST_META]:
             index_latest = storage.open_index(idx_name)
             try:
                 with index_latest.searcher() as searcher:
@@ -1003,7 +993,7 @@ def _document(self, idx_name=LATEST_REVS, short=False, **kw):
         Return a document matching the kw args (internal use only).
         """
         if short:
-            idx_name = LATEST_IDX
+            idx_name = LATEST_META
         with self.ix[idx_name].searcher() as searcher:
             return searcher.document(**kw)
 
@@ -1186,7 +1176,7 @@ def parentids(self):
         """
         parent_ids = set()
         for parent_name in self.parentnames:
-            rev = self.indexer._document(idx_name=LATEST_IDX, **{NAME_EXACT: parent_name})
+            rev = self.indexer._document(idx_name=LATEST_META, **{NAME_EXACT: parent_name})
             if rev:
                 parent_ids.add(rev[ITEMID])
         return parent_ids