|
91 | 91 |
|
92 | 92 |
|
93 | 93 | WHOOSH_FILESTORAGE = "FileStorage"
|
94 |
| -INDEXES = [LATEST_REVS, ALL_REVS, LATEST_IDX] |
| 94 | +INDEXES = [LATEST_REVS, ALL_REVS, LATEST_META] |
95 | 95 |
|
96 | 96 | VALIDATION_HANDLING_STRICT = "strict"
|
97 | 97 | VALIDATION_HANDLING_WARN = "warn"
|
@@ -148,7 +148,7 @@ def search_names(name_prefix, limit=None):
|
148 | 148 | :return: item names list
|
149 | 149 | """
|
150 | 150 |
|
151 |
| - idx_name = LATEST_IDX |
| 151 | + idx_name = LATEST_META |
152 | 152 | q = Prefix(NAME_EXACT, name_prefix)
|
153 | 153 | with flaskg.storage.indexer.ix[idx_name].searcher() as searcher:
|
154 | 154 | results = searcher.search(q, limit=limit)
|
@@ -446,56 +446,37 @@ def __init__(self, index_storage, backend, acl_rights_contents=[], **kw):
|
446 | 446 | all_revs_fields = {ITEMID: ID(stored=True)}
|
447 | 447 | all_revs_fields.update(**common_fields)
|
448 | 448 |
|
449 |
| - # very short index for queries like has_item |
450 |
| - latest_idx_fields = { |
| 449 | + # Small index for the latest revisions, used for queries such as has_item, authorization checks and |
| 450 | + # the +index route. This index has no content or *NGRAMS, which improves query speed for large wikis |
| 451 | + latest_meta_fields = { |
451 | 452 | # ITEMID from metadata - as there is only latest rev of same item here, it is unique
|
452 | 453 | ITEMID: ID(unique=True, stored=True),
|
453 |
| - # namespace, so we can have different namespaces within a wiki, always check this! |
454 | 454 | NAMESPACE: ID(stored=True),
|
455 |
| - # since name is a list whoosh will think it is a list of tokens see #364 |
456 |
| - # we store list of names, but do not use for searching |
457 | 455 | NAME: TEXT(stored=True),
|
458 |
| - # string created by joining list of Name strings, we use NAMES for searching |
459 | 456 | NAMES: TEXT(stored=True, multitoken_query="or", analyzer=item_name_analyzer(), field_boost=30.0),
|
460 |
| - # unmodified NAME from metadata - use this for precise lookup by the code. |
461 |
| - # also needed for wildcard search, so the original string as well as the query |
462 |
| - # (with the wildcard) is not cut into pieces. |
463 | 457 | NAME_EXACT: ID(field_boost=1.0),
|
464 |
| - # backend name (which backend is this rev stored in?) |
465 |
| - BACKENDNAME: ID(stored=True), |
466 |
| - # tokenized ACL from metadata |
467 |
| - ACL: TEXT(analyzer=AclTokenizer(acl_rights_contents), multitoken_query="and", stored=True), |
468 |
| - # fields for route +index -------------------------------------------- |
469 |
| - # revision id (aka meta id) |
470 | 458 | REVID: ID(unique=True, stored=True),
|
471 |
| - # sequential revision number for humans: 1, 2, 3... |
472 | 459 | REV_NUMBER: NUMERIC(stored=True),
|
473 |
| - # parent revision id |
474 | 460 | PARENTID: ID(stored=True),
|
475 |
| - # MTIME from revision metadata (converted to UTC datetime) |
| 461 | + BACKENDNAME: ID(stored=True), |
476 | 462 | MTIME: DATETIME(stored=True),
|
477 |
| - # ITEMTYPE from metadata, always matched exactly hence ID |
478 | 463 | ITEMTYPE: ID(stored=True),
|
479 |
| - # tokenized CONTENTTYPE from metadata |
480 | 464 | CONTENTTYPE: TEXT(stored=True, multitoken_query="and", analyzer=MimeTokenizer()),
|
481 |
| - # USERID from metadata |
482 | 465 | USERID: ID(stored=True),
|
483 |
| - # ADDRESS from metadata |
484 | 466 | ADDRESS: ID(stored=True),
|
485 |
| - # HOSTNAME from metadata |
486 | 467 | HOSTNAME: ID(stored=True),
|
487 |
| - # SIZE from metadata |
488 | 468 | SIZE: NUMERIC(stored=True),
|
| 469 | + ACL: TEXT(analyzer=AclTokenizer(acl_rights_contents), multitoken_query="and", stored=True), |
489 | 470 | }
|
490 | 471 |
|
491 | 472 | latest_revisions_schema = Schema(**latest_revs_fields)
|
492 | 473 | all_revisions_schema = Schema(**all_revs_fields)
|
493 |
| - latest_index_schema = Schema(**latest_idx_fields) |
| 474 | + latest_index_schema = Schema(**latest_meta_fields) |
494 | 475 |
|
495 | 476 | # schemas are needed by query parser and for index creation
|
496 | 477 | self.schemas[ALL_REVS] = all_revisions_schema
|
497 | 478 | self.schemas[LATEST_REVS] = latest_revisions_schema
|
498 |
| - self.schemas[LATEST_IDX] = latest_index_schema |
| 479 | + self.schemas[LATEST_META] = latest_index_schema |
499 | 480 |
|
500 | 481 | # Define dynamic fields
|
501 | 482 | dynamic_fields = [
|
@@ -558,6 +539,15 @@ def open(self):
|
558 | 539 | for name in INDEXES:
|
559 | 540 | self.ix[name] = storage.open_index(name)
|
560 | 541 |
|
| 542 | + def missing_index_check(self): |
| 543 | + """ |
| 544 | + check existence of all indexes. |
| 545 | + return: "all" or string with list of missing indexes |
| 546 | + """ |
| 547 | + storage = self.get_storage() |
| 548 | + missing_indexes = [name for name in INDEXES if not storage.index_exists(name)] |
| 549 | + return "all" if len(missing_indexes) == len(INDEXES) else str(missing_indexes)[1:-1] |
| 550 | + |
561 | 551 | def close(self):
|
562 | 552 | """
|
563 | 553 | Close all indexes.
|
@@ -628,7 +618,7 @@ def index_revision(self, meta, content, backend_name, async_=True, force_latest=
|
628 | 618 | == doc[REVID]
|
629 | 619 | )
|
630 | 620 | if is_latest:
|
631 |
| - for idx_name in [LATEST_REVS, LATEST_IDX]: |
| 621 | + for idx_name in [LATEST_REVS, LATEST_META]: |
632 | 622 | doc = backend_to_index(meta, content, self.schemas[idx_name], backend_name)
|
633 | 623 | if async_:
|
634 | 624 | writer = AsyncWriter(self.ix[idx_name])
|
@@ -679,7 +669,7 @@ def remove_revision(self, revid, async_=True):
|
679 | 669 | writer = self.ix[ALL_REVS].writer()
|
680 | 670 | with writer as writer:
|
681 | 671 | writer.delete_by_term(REVID, revid)
|
682 |
| - for idx_name in [LATEST_REVS, LATEST_IDX]: |
| 672 | + for idx_name in [LATEST_REVS, LATEST_META]: |
683 | 673 | self.remove_index_revision(revid, async_=async_, idx_name=idx_name)
|
684 | 674 |
|
685 | 675 | def _modify_index(self, index, schema, revids, mode="add", procs=None, limitmb=None, multisegment=False):
|
@@ -757,7 +747,7 @@ def rebuild(self, tmp=False, procs=None, limitmb=None, multisegment=False):
|
757 | 747 | index.close()
|
758 | 748 |
|
759 | 749 | # now build the indexes for latest revisions:
|
760 |
| - for idx_name in [LATEST_REVS, LATEST_IDX]: |
| 750 | + for idx_name in [LATEST_REVS, LATEST_META]: |
761 | 751 | index = storage.open_index(idx_name)
|
762 | 752 | try:
|
763 | 753 | self._modify_index(
|
@@ -809,8 +799,8 @@ def update(self, tmp=False):
|
809 | 799 | finally:
|
810 | 800 | index_all.close()
|
811 | 801 |
|
812 |
| - # update LATEST_REVS and LATEST_IDX |
813 |
| - for idx_name in [LATEST_REVS, LATEST_IDX]: |
| 802 | + # update LATEST_REVS and LATEST_META |
| 803 | + for idx_name in [LATEST_REVS, LATEST_META]: |
814 | 804 | index_latest = storage.open_index(idx_name)
|
815 | 805 | try:
|
816 | 806 | with index_latest.searcher() as searcher:
|
@@ -1003,7 +993,7 @@ def _document(self, idx_name=LATEST_REVS, short=False, **kw):
|
1003 | 993 | Return a document matching the kw args (internal use only).
|
1004 | 994 | """
|
1005 | 995 | if short:
|
1006 |
| - idx_name = LATEST_IDX |
| 996 | + idx_name = LATEST_META |
1007 | 997 | with self.ix[idx_name].searcher() as searcher:
|
1008 | 998 | return searcher.document(**kw)
|
1009 | 999 |
|
@@ -1186,7 +1176,7 @@ def parentids(self):
|
1186 | 1176 | """
|
1187 | 1177 | parent_ids = set()
|
1188 | 1178 | for parent_name in self.parentnames:
|
1189 |
| - rev = self.indexer._document(idx_name=LATEST_IDX, **{NAME_EXACT: parent_name}) |
| 1179 | + rev = self.indexer._document(idx_name=LATEST_META, **{NAME_EXACT: parent_name}) |
1190 | 1180 | if rev:
|
1191 | 1181 | parent_ids.add(rev[ITEMID])
|
1192 | 1182 | return parent_ids
|
|
0 commit comments