Oracle全文检索方面的研究(全3)(2)
ctx_ddl.set_attribute(german_lexer,mixed_case,yes);
ctx_ddl.set_attribute(german_lexer,alternate_spelling,german);
ctx_ddl.create_preference(japanese_lexer,japanese_vgram_lexer);
ctx_ddl.create_preference(global_lexer, multi_lexer);
ctx_ddl.add_sub_lexer(global_lexer,default,english_lexer);
ctx_ddl.add_sub_lexer(global_lexer,german,german_lexer,ger);
ctx_ddl.add_sub_lexer(global_lexer,japanese,japanese_lexer,jpn);
end;
create index globalx on globaldoc(text) indextype is ctxsys.context
parameters (lexer global_lexer language column lang);
3.3.3 chinese_vgram_lexer 和chinese_lexer
basic_lexer 只能识别出被空格、标点和回车符分隔出来的部分,如果要对中文内容进行索引的话,就必须使用chinese_vgram_lexer 或是chinese_lexer
Chinese_lexer 相比chinese_vgram_lexer 有如下的优点:
产生的索引更小
更好的查询响应时间
产生更接近真实的索引切词,使得查询精度更高
支持停用词
因为chinese_lexer 采用不同的算法来标记tokens, 建立索引的时间要比chinese_vgram_lexer
长.
字符集:支持al32utf8,zhs16cgb231280,zhs16gbk,zhs32gb18030,zht32euc,zht16big5
zht32tris, zht16mswin950,zht16hkscs,utf8
--建立chinese lexer
Begin
ctx_ddl.create_preference(my_chinese_vgram_lexer, chinese_vgram_lexer);
ctx_ddl.create_preference(my_chinese_lexer, chinese_lexer);
End;
-- chinese_vgram_lexer
Create index ind_m_lex1 on my_lex(docs) indextype is ctxsys.context Parameters (lexer foo.my_chinese_vgram_lexer);
Select * from my_lex t where contains(docs, 中国) > 0;
-- chinese_lexer
drop index ind_m_lex1 force;
Create index ind_m_lex2 on my_lex(docs) indextype is ctxsys.context
Parameters (lexer ctxsys.my_chinese_lexer);
Select * from my_lex t where contains(docs, 中国) > 0;
3.3.4 User_lexer
Use USER_LEXER to plug in your own language-specific lexing solution. This enables you to
define lexers for languages that are not supported by Oracle Text. It also enables you to define a
new lexer for a language that is supported but whose lexer is inappropriate for your application.
3.3.5 Default_lexer
如果数据库在建立的时候指定的是中文则default_lexer 为chinese_vgram_lexer,如果是英文,则default_lexer 为basic_lexer
3.3.6 Query_procedure
This callback stored procedure is called by Oracle Text as needed to tokenize words in the query.
A space-delimited group of characters (excluding the query operators) in the query will be
identified by Oracle Text as a word.
3.3.7 参考脚本
--建立basic_lexer
begin
ctx_ddl.create_preference(mylex, BASIC_LEXER);
ctx_ddl.set_attribute (mylex, printjoins, _-); --保留_ -符号
ctx_ddl.set_attribute (mylex,mixed_case,yes); --区分大小写
end;
create index indx_m_lex on my_lex(docs) indextype is ctxsys.context parameters(lexer
相关新闻>>
- 发表评论
-
- 最新评论 更多>>