Commit 29f65832 authored by Kazuhiko Shiozaki's avatar Kazuhiko Shiozaki

more delailed sphinx.conf template using English stemming rule for now.

parent f0060584
......@@ -38,6 +38,57 @@ index erp5
# rt_attr_timestamp = ts_added
# rt_attr_string = author
# document attribute values (docinfo) storage mode
# optional, default is 'extern'
# known values are 'none', 'extern' and 'inline'
# docinfo = extern
# memory locking for cached data (.spa and .spi), to prevent swapping
# optional, default is 0 (do not mlock)
# requires searchd to be run from root
# mlock = 0
# a list of morphology preprocessors to apply
# optional, default is empty
#
# builtin preprocessors are 'none', 'stem_en', 'stem_ru', 'stem_enru',
# 'soundex', and 'metaphone'; additional preprocessors available from
# libstemmer are 'libstemmer_XXX', where XXX is algorithm code
# (see libstemmer_c/libstemmer/modules.txt)
#
# morphology = stem_en, stem_ru, soundex
# morphology = libstemmer_german
# morphology = libstemmer_sv
morphology = stem_en
# minimum word length at which to enable stemming
# optional, default is 1 (stem everything)
#
# min_stemming_len = 1
# stopword files list (space separated)
# optional, default is empty
# contents are plain text, charset_table and stemming are both applied
#
# stopwords = %(data_directory)s/erp5/stopwords.txt
# wordforms file, in "mapfrom > mapto" plain text format
# optional, default is empty
#
# wordforms = %(data_directory)s/erp5/wordforms.txt
# tokenizing exceptions file
# optional, default is empty
#
# plain text, case sensitive, space insensitive in map-from part
# one "Map Several Words => ToASingleOne" entry per line
#
# exceptions = %(data_directory)s/erp5/exceptions.txt
# minimum indexed word length
# default is 1 (index everything)
min_word_len = 1
# charset encoding type
# optional, default is 'sbcs'
# known types are 'sbcs' (Single Byte CharSet) and 'utf-8'
......@@ -245,6 +296,39 @@ index erp5
U+0B92, U+0B93, U+0B95, U+0B99, U+0B9A, U+0B9C, U+0B9E, U+0B9F, U+0BA3, U+0BA4, U+0BA8..U+0BAA, U+0BAE..U+0BB9, U+0BE6..U+0BEF, U+0E01..U+0E30, U+0E32, U+0E33, U+0E40..U+0E46, U+0E50..U+0E5B, U+FF10..U+FF19->0..9, U+FF21..U+FF3A->a..z, U+FF41..U+FF5A->a..z, \
0..9, A..Z->a..z, a..z
# ignored characters list
# optional, default value is empty
#
# ignore_chars = U+00AD
# minimum word prefix length to index
# optional, default is 0 (do not index prefixes)
#
# min_prefix_len = 0
# minimum word infix length to index
# optional, default is 0 (do not index infixes)
#
# min_infix_len = 0
# list of fields to limit prefix/infix indexing to
# optional, default value is empty (index all fields in prefix/infix mode)
#
# prefix_fields = filename
# infix_fields = url, domain
# enable star-syntax (wildcards) when searching prefix/infix indexes
# search-time only, does not affect indexing, can be 0 or 1
# optional, default is 0 (do not use wildcard syntax)
#
# enable_star = 1
# expand keywords with exact forms and/or stars when searching fit indexes
# search-time only, does not affect indexing, can be 0 or 1
# optional, default is 0 (do not expand keywords)
#
# expand_keywords = 1
# n-gram length to index, for CJK indexing
# only supports 0 and 1 for now, other lengths to be implemented
# optional, default is 0 (disable n-grams)
......@@ -255,6 +339,104 @@ index erp5
# optional, default is empty
#
ngram_chars = U+4E00..U+9FBB, U+3400..U+4DB5, U+20000..U+2A6D6, U+FA0E, U+FA0F, U+FA11, U+FA13, U+FA14, U+FA1F, U+FA21, U+FA23, U+FA24, U+FA27, U+FA28, U+FA29, U+3105..U+312C, U+31A0..U+31B7, U+3041, U+3043, U+3045, U+3047, U+3049, U+304B, U+304D, U+304F, U+3051, U+3053, U+3055, U+3057, U+3059, U+305B, U+305D, U+305F, U+3061, U+3063, U+3066, U+3068, U+306A..U+306F, U+3072, U+3075, U+3078, U+307B, U+307E..U+3083, U+3085, U+3087, U+3089..U+308E, U+3090..U+3093, U+30A1, U+30A3, U+30A5, U+30A7, U+30A9, U+30AD, U+30AF, U+30B3, U+30B5, U+30BB, U+30BD, U+30BF, U+30C1, U+30C3, U+30C4, U+30C6, U+30CA, U+30CB, U+30CD, U+30CE, U+30DE, U+30DF, U+30E1, U+30E2, U+30E3, U+30E5, U+30E7, U+30EE, U+30F0..U+30F3, U+30F5, U+30F6, U+31F0, U+31F1, U+31F2, U+31F3, U+31F4, U+31F5, U+31F6, U+31F7, U+31F8, U+31F9, U+31FA, U+31FB, U+31FC, U+31FD, U+31FE, U+31FF, U+AC00..U+D7A3, U+1100..U+1159, U+1161..U+11A2, U+11A8..U+11F9, U+A000..U+A48C, U+A492..U+A4C6
# phrase boundary characters list
# optional, default is empty
#
# phrase_boundary = ., ?, !, U+2026 # horizontal ellipsis
# phrase boundary word position increment
# optional, default is 0
#
# phrase_boundary_step = 100
# blended characters list
# blended chars are indexed both as separators and valid characters
# for instance, AT&T will results in 3 tokens ("at", "t", and "at&t")
# optional, default is empty
#
# blend_chars = +, &, U+23
# blended token indexing mode
# a comma separated list of blended token indexing variants
# known variants are trim_none, trim_head, trim_tail, trim_both, skip_pure
# optional, default is trim_none
#
# blend_mode = trim_tail, skip_pure
# whether to strip HTML tags from incoming documents
# known values are 0 (do not strip) and 1 (do strip)
# optional, default is 0
html_strip = 0
# what HTML attributes to index if stripping HTML
# optional, default is empty (do not index anything)
#
# html_index_attrs = img=alt,title; a=title;
# what HTML elements contents to strip
# optional, default is empty (do not strip element contents)
#
# html_remove_elements = style, script
# whether to preopen index data files on startup
# optional, default is 0 (do not preopen), searchd-only
#
# preopen = 1
# whether to keep dictionary (.spi) on disk, or cache it in RAM
# optional, default is 0 (cache in RAM), searchd-only
#
# ondisk_dict = 1
# whether to enable in-place inversion (2x less disk, 90-95% speed)
# optional, default is 0 (use separate temporary files), indexer-only
#
# inplace_enable = 1
# in-place fine-tuning options
# optional, defaults are listed below
#
# inplace_hit_gap = 0 # preallocated hitlist gap size
# inplace_docinfo_gap = 0 # preallocated docinfo gap size
# inplace_reloc_factor = 0.1 # relocation buffer size within arena
# inplace_write_factor = 0.1 # write buffer size within arena
# whether to index original keywords along with stemmed versions
# enables "=exactform" operator to work
# optional, default is 0
#
# index_exact_words = 1
# position increment on overshort (less that min_word_len) words
# optional, allowed values are 0 and 1, default is 1
#
# overshort_step = 1
# position increment on stopword
# optional, allowed values are 0 and 1, default is 1
#
# stopword_step = 1
# hitless words list
# positions for these keywords will not be stored in the index
# optional, allowed values are 'all', or a list file name
#
# hitless_words = all
# hitless_words = hitless.txt
# detect and index sentence and paragraph boundaries
# required for the SENTENCE and PARAGRAPH operators to work
# optional, allowed values are 0 and 1, default is 0
#
# index_sp = 1
# index zones, delimited by HTML/XML tags
# a comma separated list of tags and wildcards
# required for the ZONE operator to work
# optional, default is empty string (do not index zones)
#
# index_zones = title, h*, th
}
#############################################################################
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment