Skip to content

Commit a15b8be

Browse files
committed
Refactoring of 3rd index code, rename index, add handling of missing indexes
1 parent d7a2985 commit a15b8be

File tree

7 files changed

+63
-59
lines changed

7 files changed

+63
-59
lines changed

src/moin/app.py

Lines changed: 28 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -169,16 +169,8 @@ class ItemNameConverter(PathConverter):
169169
clock.stop("create_app flask-cache")
170170
# init storage
171171
clock.start("create_app init backends")
172-
try:
173-
init_backends(app)
174-
except EmptyIndexError:
175-
# create-instance has no index at start and index-* subcommands check the index individually
176-
if info_name not in ["create-instance", "build-instance"] and not info_name.startswith("index-"):
177-
clock.stop("create_app init backends")
178-
clock.stop("create_app total")
179-
logging.error("Error: Wiki index not found. Try 'moin help' or 'moin --help' to get further information.")
180-
raise SystemExit(1)
181-
logging.debug("Wiki index not found.")
172+
# start init_backends
173+
_init_backends(app, info_name, clock)
182174
clock.stop("create_app init backends")
183175
clock.start("create_app flask-babel")
184176
i18n_init(app)
@@ -212,6 +204,32 @@ def destroy_app(app):
212204
deinit_backends(app)
213205

214206

207+
def _init_backends(app, info_name, clock):
208+
"""
209+
initialize the backends with exception handling
210+
"""
211+
try:
212+
init_backends(app)
213+
except EmptyIndexError:
214+
# create-instance has no index at start and index-* subcommands check the index individually
215+
if info_name not in ["create-instance", "build-instance"] and not info_name.startswith("index-"):
216+
missing_indexes = app.storage.missing_index_check()
217+
if missing_indexes == "all":
218+
logging.error(
219+
"Error: all wiki indexes missing. Try 'moin help' or 'moin --help' to get further information."
220+
)
221+
elif missing_indexes == "'latest_meta'": # TODO: remove this check after 6-12 month
222+
logging.error(
223+
"Error: Wiki index 'latest_meta' missing. Please see https://github.com/moinwiki/moin/pull/1877"
224+
)
225+
else:
226+
logging.error(f"Error: Wiki index {missing_indexes} missing, please check.")
227+
clock.stop("create_app init backends")
228+
clock.stop("create_app total")
229+
raise SystemExit(1)
230+
logging.debug("Wiki index not found.")
231+
232+
215233
def init_backends(app, create_backend=False):
216234
"""
217235
initialize the backends

src/moin/apps/frontend/views.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -585,9 +585,8 @@ def wrapper(item_name, rev):
585585
abort(404, item_name)
586586
if add_trail:
587587
flaskg.user.add_trail(item_name, aliases=item.meta.revision.fqnames)
588-
if rev == CURRENT:
589-
"""if view has been called with default rev=CURRENT we can avoid an index query in flash_if_item_deleted"""
590-
item.is_current = CURRENT
588+
"""if view has been called with default rev=CURRENT we can avoid an index query in flash_if_item_deleted"""
589+
item.is_current = rev == CURRENT
591590
return wrapped(item)
592591

593592
return wrapper
@@ -605,7 +604,7 @@ def flash_if_item_deleted(item_name, rev_id, itemrev):
605604
Show flash info message if target item is deleted, show another message if revision is deleted.
606605
Return True if item is deleted or this revision is deleted.
607606
"""
608-
rev_id = getattr(itemrev, "is_current", rev_id)
607+
rev_id = CURRENT if getattr(itemrev, "is_current", False) else rev_id
609608
if not rev_id == CURRENT:
610609
ret = False
611610
current_item = Item.create(item_name, rev_id=CURRENT)

src/moin/cli/_tests/__init__.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616

1717
from moin._tests import get_dirs
1818
from moin import log
19-
from moin.constants.keys import ALL_REVS, LATEST_IDX
19+
from moin.constants.keys import ALL_REVS, LATEST_META
2020

2121
logging = log.getLogger(__name__)
2222

@@ -134,7 +134,7 @@ def read_index_dump(out: str, latest=False):
134134
item = {}
135135
if latest and ALL_REVS in line:
136136
break
137-
if LATEST_IDX in line:
137+
if LATEST_META in line:
138138
break
139139
continue
140140
space_index = line.index(" ")

src/moin/cli/maint/index.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from flask.cli import FlaskGroup
1414

1515
from moin.app import create_app, init_backends
16-
from moin.constants.keys import LATEST_REVS, ALL_REVS, LATEST_IDX
16+
from moin.constants.keys import LATEST_REVS, ALL_REVS, LATEST_META
1717
from moin.utils.filesys import wiki_index_exists
1818

1919

@@ -134,7 +134,7 @@ def IndexDump(tmp, truncate):
134134
logging.error(ERR_NO_INDEX)
135135
raise SystemExit(1)
136136
logging.info("Index dump started")
137-
for idx_name in [LATEST_REVS, ALL_REVS, LATEST_IDX]:
137+
for idx_name in [LATEST_REVS, ALL_REVS, LATEST_META]:
138138
print(f" {'-' * 10} {idx_name} {'-' * 60}")
139139
for kvs in app.storage.dump(tmp=tmp, idx_name=idx_name):
140140
for k, v in kvs:

src/moin/constants/keys.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,7 @@
170170
# index names
171171
LATEST_REVS = "latest_revs"
172172
ALL_REVS = "all_revs"
173-
LATEST_IDX = "latest_idx"
173+
LATEST_META = "latest_meta"
174174

175175
# values for ACTION key
176176
ACTION_SAVE = "SAVE"

src/moin/items/__init__.py

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@
8383
TAGS,
8484
TEMPLATE,
8585
LATEST_REVS,
86-
LATEST_IDX,
86+
LATEST_META,
8787
EDIT_ROWS,
8888
FQNAMES,
8989
USERGROUP,
@@ -1372,10 +1372,7 @@ def get_index(self, startswith=None, selected_groups=None, regex=None, short=Fal
13721372
- one for "dirs" (direct descendents that also contain descendents)
13731373
"""
13741374
fqname = self.fqname
1375-
if short:
1376-
idx_name = LATEST_IDX
1377-
else:
1378-
idx_name = LATEST_REVS
1375+
idx_name = LATEST_META if short else LATEST_REVS
13791376
isglobalindex = not fqname.value or fqname.value == NAMESPACE_ALL
13801377
query = self.build_index_query(startswith, selected_groups, isglobalindex)
13811378
if not fqname.value.startswith(NAMESPACE_ALL + "/") and fqname.value != NAMESPACE_ALL:

src/moin/storage/middleware/indexing.py

Lines changed: 25 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@
9191

9292

9393
WHOOSH_FILESTORAGE = "FileStorage"
94-
INDEXES = [LATEST_REVS, ALL_REVS, LATEST_IDX]
94+
INDEXES = [LATEST_REVS, ALL_REVS, LATEST_META]
9595

9696
VALIDATION_HANDLING_STRICT = "strict"
9797
VALIDATION_HANDLING_WARN = "warn"
@@ -148,7 +148,7 @@ def search_names(name_prefix, limit=None):
148148
:return: item names list
149149
"""
150150

151-
idx_name = LATEST_IDX
151+
idx_name = LATEST_META
152152
q = Prefix(NAME_EXACT, name_prefix)
153153
with flaskg.storage.indexer.ix[idx_name].searcher() as searcher:
154154
results = searcher.search(q, limit=limit)
@@ -446,56 +446,37 @@ def __init__(self, index_storage, backend, acl_rights_contents=[], **kw):
446446
all_revs_fields = {ITEMID: ID(stored=True)}
447447
all_revs_fields.update(**common_fields)
448448

449-
# very short index for queries like has_item
450-
latest_idx_fields = {
449+
# Small index for the latest revisions, used for queries such as has_item, authorization checks and
450+
# the +index route. This index has no content or *NGRAMS, which improves query speed for large wikis
451+
latest_meta_fields = {
451452
# ITEMID from metadata - as there is only latest rev of same item here, it is unique
452453
ITEMID: ID(unique=True, stored=True),
453-
# namespace, so we can have different namespaces within a wiki, always check this!
454454
NAMESPACE: ID(stored=True),
455-
# since name is a list whoosh will think it is a list of tokens see #364
456-
# we store list of names, but do not use for searching
457455
NAME: TEXT(stored=True),
458-
# string created by joining list of Name strings, we use NAMES for searching
459456
NAMES: TEXT(stored=True, multitoken_query="or", analyzer=item_name_analyzer(), field_boost=30.0),
460-
# unmodified NAME from metadata - use this for precise lookup by the code.
461-
# also needed for wildcard search, so the original string as well as the query
462-
# (with the wildcard) is not cut into pieces.
463457
NAME_EXACT: ID(field_boost=1.0),
464-
# backend name (which backend is this rev stored in?)
465-
BACKENDNAME: ID(stored=True),
466-
# tokenized ACL from metadata
467-
ACL: TEXT(analyzer=AclTokenizer(acl_rights_contents), multitoken_query="and", stored=True),
468-
# fields for route +index --------------------------------------------
469-
# revision id (aka meta id)
470458
REVID: ID(unique=True, stored=True),
471-
# sequential revision number for humans: 1, 2, 3...
472459
REV_NUMBER: NUMERIC(stored=True),
473-
# parent revision id
474460
PARENTID: ID(stored=True),
475-
# MTIME from revision metadata (converted to UTC datetime)
461+
BACKENDNAME: ID(stored=True),
476462
MTIME: DATETIME(stored=True),
477-
# ITEMTYPE from metadata, always matched exactly hence ID
478463
ITEMTYPE: ID(stored=True),
479-
# tokenized CONTENTTYPE from metadata
480464
CONTENTTYPE: TEXT(stored=True, multitoken_query="and", analyzer=MimeTokenizer()),
481-
# USERID from metadata
482465
USERID: ID(stored=True),
483-
# ADDRESS from metadata
484466
ADDRESS: ID(stored=True),
485-
# HOSTNAME from metadata
486467
HOSTNAME: ID(stored=True),
487-
# SIZE from metadata
488468
SIZE: NUMERIC(stored=True),
469+
ACL: TEXT(analyzer=AclTokenizer(acl_rights_contents), multitoken_query="and", stored=True),
489470
}
490471

491472
latest_revisions_schema = Schema(**latest_revs_fields)
492473
all_revisions_schema = Schema(**all_revs_fields)
493-
latest_index_schema = Schema(**latest_idx_fields)
474+
latest_index_schema = Schema(**latest_meta_fields)
494475

495476
# schemas are needed by query parser and for index creation
496477
self.schemas[ALL_REVS] = all_revisions_schema
497478
self.schemas[LATEST_REVS] = latest_revisions_schema
498-
self.schemas[LATEST_IDX] = latest_index_schema
479+
self.schemas[LATEST_META] = latest_index_schema
499480

500481
# Define dynamic fields
501482
dynamic_fields = [
@@ -558,6 +539,15 @@ def open(self):
558539
for name in INDEXES:
559540
self.ix[name] = storage.open_index(name)
560541

542+
def missing_index_check(self):
543+
"""
544+
check existence of all indexes.
545+
return: "all" or string with list of missing indexes
546+
"""
547+
storage = self.get_storage()
548+
missing_indexes = [name for name in INDEXES if not storage.index_exists(name)]
549+
return "all" if len(missing_indexes) == len(INDEXES) else str(missing_indexes)[1:-1]
550+
561551
def close(self):
562552
"""
563553
Close all indexes.
@@ -628,7 +618,7 @@ def index_revision(self, meta, content, backend_name, async_=True, force_latest=
628618
== doc[REVID]
629619
)
630620
if is_latest:
631-
for idx_name in [LATEST_REVS, LATEST_IDX]:
621+
for idx_name in [LATEST_REVS, LATEST_META]:
632622
doc = backend_to_index(meta, content, self.schemas[idx_name], backend_name)
633623
if async_:
634624
writer = AsyncWriter(self.ix[idx_name])
@@ -679,7 +669,7 @@ def remove_revision(self, revid, async_=True):
679669
writer = self.ix[ALL_REVS].writer()
680670
with writer as writer:
681671
writer.delete_by_term(REVID, revid)
682-
for idx_name in [LATEST_REVS, LATEST_IDX]:
672+
for idx_name in [LATEST_REVS, LATEST_META]:
683673
self.remove_index_revision(revid, async_=async_, idx_name=idx_name)
684674

685675
def _modify_index(self, index, schema, revids, mode="add", procs=None, limitmb=None, multisegment=False):
@@ -757,7 +747,7 @@ def rebuild(self, tmp=False, procs=None, limitmb=None, multisegment=False):
757747
index.close()
758748

759749
# now build the indexes for latest revisions:
760-
for idx_name in [LATEST_REVS, LATEST_IDX]:
750+
for idx_name in [LATEST_REVS, LATEST_META]:
761751
index = storage.open_index(idx_name)
762752
try:
763753
self._modify_index(
@@ -809,8 +799,8 @@ def update(self, tmp=False):
809799
finally:
810800
index_all.close()
811801

812-
# update LATEST_REVS and LATEST_IDX
813-
for idx_name in [LATEST_REVS, LATEST_IDX]:
802+
# update LATEST_REVS and LATEST_META
803+
for idx_name in [LATEST_REVS, LATEST_META]:
814804
index_latest = storage.open_index(idx_name)
815805
try:
816806
with index_latest.searcher() as searcher:
@@ -1003,7 +993,7 @@ def _document(self, idx_name=LATEST_REVS, short=False, **kw):
1003993
Return a document matching the kw args (internal use only).
1004994
"""
1005995
if short:
1006-
idx_name = LATEST_IDX
996+
idx_name = LATEST_META
1007997
with self.ix[idx_name].searcher() as searcher:
1008998
return searcher.document(**kw)
1009999

@@ -1186,7 +1176,7 @@ def parentids(self):
11861176
"""
11871177
parent_ids = set()
11881178
for parent_name in self.parentnames:
1189-
rev = self.indexer._document(idx_name=LATEST_IDX, **{NAME_EXACT: parent_name})
1179+
rev = self.indexer._document(idx_name=LATEST_META, **{NAME_EXACT: parent_name})
11901180
if rev:
11911181
parent_ids.add(rev[ITEMID])
11921182
return parent_ids

0 commit comments

Comments
 (0)