Applies To
All Users
Oracle Text - Version 9.2.0.1 and later
Summary
When setting up an Oracle Text index with Multi_Column_Datastore, when searching for the column names, you get all the records.
SQL> connect textuser/textuser
create table TEST_MULTI_COLUMN
(Id Number(7)
,CLIENT varchar2(10 char)
,ADDRESS varchar2(10 char)
,CTX_VALUE varchar2(1)) --TEXT index 查询类,伪列
/
Table created.
insert into TEST_MULTI_COLUMN
(ID, CLIENT, ADDRESS, CTX_VALUE)
values
(1, 'ORACLE', Null, Null)
/
1 row created.
Commit
/
Commit complete.
select * from TEST_MULTI_COLUMN;
ID CLIENT ADDRESS C
1 ORACLE
begin
ctx_ddl.create_preference('TEST_MULTI', 'MULTI_COLUMN_DATASTORE');
ctx_ddl.set_attribute('TEST_MULTI', 'columns', 'CLIENT,ADDRESS');
end;
/
PL/SQL procedure successfully completed.
CREATE INDEX TEST_MULTI_COL ON TEST_MULTI_COLUMN(CTX_VALUE)
INDEXTYPE IS CTXSYS.CONTEXT
PARAMETERS ('datastore TEST_MULTI
filter ctxsys.null_filter
stoplist ctxsys.EMPTY_STOPLIST
memory 200M')
/
Index created.
-- this should return 1 record
select * from TEST_MULTI_COLUMN t
where contains(t.ctx_value, 'oracle') > 0
/
ID CLIENT ADDRESS C
1 ORACLE
-- this should return NO records
select * from TEST_MULTI_COLUMN t
where contains(t.ctx_value, 'client') > 0
/
ID CLIENT ADDRESS C
1 ORACLE
-- this should return no records
select * from TEST_MULTI_COLUMN t
where contains(t.ctx_value, 'address') > 0
/
ID CLIENT ADDRESS C
1 ORACLE
Solution
Since 10g, the new DELIMITER attribute can be set to COLUMN_NAME_TAG, which preserves this behavior, or NEWLINE, which uses a newline instead of these tags. NEWLINE can be used in situations where the tags are not needed or may be confusing -- columns with XML fragments which are already tagged, or a single value split into multiple columns (like primary-detail, but horizontally).
To avoid this behavior specify the delimiter as NEWLINE or use the basic section group.
Note:
The multi-column datastore is designed to be used in conjunction with section searching -- preferably field sections.
begin
ctx_ddl.set_attribute('TEST_MULTI','delimiter','NEWLINE');
end;
/
PL/SQL procedure successfully completed.
drop index TEST_MULTI_COL;
Index dropped.
CREATE INDEX TEST_MULTI_COL ON TEST_MULTI_COLUMN(CTX_VALUE)
INDEXTYPE IS CTXSYS.CONTEXT
PARAMETERS ('datastore TEST_MULTI
filter ctxsys.null_filter
stoplist ctxsys.EMPTY_STOPLIST
memory 200M')
/
Index created.
-- this should return 1 record
select * from TEST_MULTI_COLUMN t
where contains(t.ctx_value, 'oracle') > 0
/
ID CLIENT ADDRESS C
1 ORACLE
-- this should return no records
select * from TEST_MULTI_COLUMN t
where contains(t.ctx_value, 'client') > 0
/
no rows selected
Cause
This is expected behavior. By default MULTI_COLUMN_DATASTORE Attribute "delimiter" is set to COLUMN_NAME_TAG.
The multi-column datastore will format the multiple columns into a semistructured document and produces a virtual sectioned document like this:
<COLUMN1>
column1 contents --会存成 client oracle 而不是只存oracle
</COLUMN1>
<COLUMN2>
column2 contents
</COLUMN2>
...
Using the example above, coming out of the datastore will be:
<CLIENT>
Oracle
</CLIENT>
<ADDRESS>
</ADDRESS>
So the column names are part of the document text, and are being indexed and in the absence of a sectioner, those tags get treated as text.
select token_text, token_type from drTEST_MULTI_COLi;
TOKEN_TEXT TOKEN_TYPE
ADDRESS 0
CLIENT 0
ORACLE 0
References
MOS document id: 1364654.1
Product Versions
product: Oracle Text - min_version: 9.2.0.1 - max_version: none; Information in this article applies to GENERIC (All Platforms)
Article Feedback
select *from TEST_MULTI_COLUMN
drop index .TEST_MULTI_COL
begin
ctx_ddl.create_preference('TEST_MULTI', 'MULTI_COLUMN_DATASTORE');
ctx_ddl.set_attribute('TEST_MULTI', 'columns', 'CLIENT,ADDRESS');
end;
/
CREATE INDEX TEST_MULTI_COL ON TEST_MULTI_COLUMN(CTX_VALUE)
INDEXTYPE IS CTXSYS.CONTEXT
PARAMETERS ('datastore TEST_MULTI
filter ctxsys.null_filter
stoplist ctxsys.EMPTY_STOPLIST
memory 536870912)')
/
select ctx_report.create_index_script('TEST_MULTI_COL') from dual;
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_DST"','MULTI_COLUMN_DATASTORE');
ctx_ddl.set_attribute('"TEST_MULTI_COL_DST"','COLUMNS','CLIENT,ADDRESS');
ctx_ddl.set_attribute('"TEST_MULTI_COL_DST"','DELIMITER','NEWLINE');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_FIL"','NULL_FILTER');
end;
/
begin
ctx_ddl.create_section_group('"TEST_MULTI_COL_SGP"','NULL_SECTION_GROUP');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_LEX"','BASIC_LEXER');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_WDL"','BASIC_WORDLIST');
ctx_ddl.set_attribute('"TEST_MULTI_COL_WDL"','STEMMER','ENGLISH');
ctx_ddl.set_attribute('"TEST_MULTI_COL_WDL"','FUZZY_MATCH','GENERIC');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_STO"','BASIC_STORAGE');
ctx_ddl.set_attribute('"TEST_MULTI_COL_STO"','R_TABLE_CLAUSE','lob (data) store as (cache)');
ctx_ddl.set_attribute('"TEST_MULTI_COL_STO"','I_INDEX_CLAUSE','compress 2');
end;
/
create index ""."TEST_MULTI_COL"
on ""."TEST_MULTI_COLUMN"
("CTX_VALUE")
indextype is ctxsys.context
parameters('
datastore "TEST_MULTI_COL_DST"
filter "TEST_MULTI_COL_FIL"
section group "TEST_MULTI_COL_SGP"
lexer "TEST_MULTI_COL_LEX"
wordlist "TEST_MULTI_COL_WDL"
stoplist "CTXSYS"."EMPTY_STOPLIST"
storage "TEST_MULTI_COL_STO"
fast_dml ---- 没有memory的定义了
')
/
drop index TEST_MULTI_COL
CREATE INDEX TEST_MULTI_COL ON TEST_MULTI_COLUMN(CTX_VALUE)
INDEXTYPE IS CTXSYS.CONTEXT
PARAMETERS ('datastore TEST_MULTI
filter ctxsys.null_filter
stoplist ctxsys.EMPTY_STOPLIST
sync (every "SYSDATE+10/(24*60)" memory 536870912)')
/
select ctx_report.create_index_script('TEST_MULTI_COL') from dual; 获取index 的定义
begin CTX_DDL.SYNC_INDEX('TEST_MULTI_COL'); end;
begin ctxsys.drvdml.auto_sync_index('"TEST_MULTI_COL"', 536870912, NULL, NULL, NULL, 0); end;
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_DST"','MULTI_COLUMN_DATASTORE');
ctx_ddl.set_attribute('"TEST_MULTI_COL_DST"','COLUMNS','CLIENT,ADDRESS');
ctx_ddl.set_attribute('"TEST_MULTI_COL_DST"','DELIMITER','NEWLINE');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_FIL"','NULL_FILTER');
end;
/
begin
ctx_ddl.create_section_group('"TEST_MULTI_COL_SGP"','NULL_SECTION_GROUP');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_LEX"','BASIC_LEXER');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_WDL"','BASIC_WORDLIST');
ctx_ddl.set_attribute('"TEST_MULTI_COL_WDL"','STEMMER','ENGLISH');
ctx_ddl.set_attribute('"TEST_MULTI_COL_WDL"','FUZZY_MATCH','GENERIC');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_STO"','BASIC_STORAGE');
ctx_ddl.set_attribute('"TEST_MULTI_COL_STO"','R_TABLE_CLAUSE','lob (data) store as (cache)');
ctx_ddl.set_attribute('"TEST_MULTI_COL_STO"','I_INDEX_CLAUSE','compress 2');
end;
/
create index "AAA"."TEST_MULTI_COL"
on "AAA"."TEST_MULTI_COLUMN"
("CTX_VALUE")
indextype is ctxsys.context
parameters('
datastore "TEST_MULTI_COL_DST"
filter "TEST_MULTI_COL_FIL"
section group "TEST_MULTI_COL_SGP"
lexer "TEST_MULTI_COL_LEX"
wordlist "TEST_MULTI_COL_WDL"
stoplist "CTXSYS"."EMPTY_STOPLIST"
storage "TEST_MULTI_COL_STO"
fast_dml
sync (every "SYSDATE+10/(24*60)" memory 536870912)
')
/
select *from TEST_MULTI_COLUMN
drop index .TEST_MULTI_COL
begin
ctx_ddl.create_preference('TEST_MULTI', 'MULTI_COLUMN_DATASTORE');
ctx_ddl.set_attribute('TEST_MULTI', 'columns', 'CLIENT,ADDRESS');
end;
/
CREATE INDEX TEST_MULTI_COL ON TEST_MULTI_COLUMN(CTX_VALUE)
INDEXTYPE IS CTXSYS.CONTEXT
PARAMETERS ('datastore TEST_MULTI
filter ctxsys.null_filter
stoplist ctxsys.EMPTY_STOPLIST
memory 536870912)')
/
select ctx_report.create_index_script('TEST_MULTI_COL') from dual;
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_DST"','MULTI_COLUMN_DATASTORE');
ctx_ddl.set_attribute('"TEST_MULTI_COL_DST"','COLUMNS','CLIENT,ADDRESS');
ctx_ddl.set_attribute('"TEST_MULTI_COL_DST"','DELIMITER','NEWLINE');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_FIL"','NULL_FILTER');
end;
/
begin
ctx_ddl.create_section_group('"TEST_MULTI_COL_SGP"','NULL_SECTION_GROUP');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_LEX"','BASIC_LEXER');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_WDL"','BASIC_WORDLIST');
ctx_ddl.set_attribute('"TEST_MULTI_COL_WDL"','STEMMER','ENGLISH');
ctx_ddl.set_attribute('"TEST_MULTI_COL_WDL"','FUZZY_MATCH','GENERIC');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_STO"','BASIC_STORAGE');
ctx_ddl.set_attribute('"TEST_MULTI_COL_STO"','R_TABLE_CLAUSE','lob (data) store as (cache)');
ctx_ddl.set_attribute('"TEST_MULTI_COL_STO"','I_INDEX_CLAUSE','compress 2');
end;
/
create index "TEST_MULTI_COL"
on "TEST_MULTI_COLUMN"
("CTX_VALUE")
indextype is ctxsys.context
parameters('
datastore "TEST_MULTI_COL_DST"
filter "TEST_MULTI_COL_FIL"
section group "TEST_MULTI_COL_SGP"
lexer "TEST_MULTI_COL_LEX"
wordlist "TEST_MULTI_COL_WDL"
stoplist "CTXSYS"."EMPTY_STOPLIST"
storage "TEST_MULTI_COL_STO"
fast_dml
')
/
drop index TEST_MULTI_COL
CREATE INDEX TEST_MULTI_COL ON TEST_MULTI_COLUMN(CTX_VALUE)
INDEXTYPE IS CTXSYS.CONTEXT
PARAMETERS ('datastore TEST_MULTI
filter ctxsys.null_filter
stoplist ctxsys.EMPTY_STOPLIST
sync (every "SYSDATE+10/(24*60)" memory 536870912)')
/
select ctx_report.create_index_script('TEST_MULTI_COL') from dual;
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_DST"','MULTI_COLUMN_DATASTORE');
ctx_ddl.set_attribute('"TEST_MULTI_COL_DST"','COLUMNS','CLIENT,ADDRESS');
ctx_ddl.set_attribute('"TEST_MULTI_COL_DST"','DELIMITER','NEWLINE');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_FIL"','NULL_FILTER');
end;
/
begin
ctx_ddl.create_section_group('"TEST_MULTI_COL_SGP"','NULL_SECTION_GROUP');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_LEX"','BASIC_LEXER');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_WDL"','BASIC_WORDLIST');
ctx_ddl.set_attribute('"TEST_MULTI_COL_WDL"','STEMMER','ENGLISH');
ctx_ddl.set_attribute('"TEST_MULTI_COL_WDL"','FUZZY_MATCH','GENERIC');
end;
/
begin
ctx_ddl.create_preference('"TEST_MULTI_COL_STO"','BASIC_STORAGE');
ctx_ddl.set_attribute('"TEST_MULTI_COL_STO"','R_TABLE_CLAUSE','lob (data) store as (cache)');
ctx_ddl.set_attribute('"TEST_MULTI_COL_STO"','I_INDEX_CLAUSE','compress 2');
end;
/
create index "AAA"."TEST_MULTI_COL"
on "AAA"."TEST_MULTI_COLUMN"
("CTX_VALUE")
indextype is ctxsys.context
parameters('
datastore "TEST_MULTI_COL_DST"
filter "TEST_MULTI_COL_FIL"
section group "TEST_MULTI_COL_SGP"
lexer "TEST_MULTI_COL_LEX"
wordlist "TEST_MULTI_COL_WDL"
stoplist "CTXSYS"."EMPTY_STOPLIST"
storage "TEST_MULTI_COL_STO"
fast_dml
sync (every "SYSDATE+10/(24*60)" memory 536870912)
')
/
select *from user_scheduler_jobs
select *from ctxsys.ctx_indexes ------默认创建的Domain index 不带sync 选项,不会自动sync,commit也不会sync,需要加上参数
sync (every "SYSDATE+10/(24*60)" memory 536870912)')
sync (on commit)')
IDX_ID IDX_OWNER IDX_NAME IDX_TABLE_OWNER IDX_TABLE IDX_KEY_NAME IDX_TEXT_NAME IDX_DOCID_COUNT IDX_STATUS IDX_LANGUAGE_COLUMN IDX_FORMAT_COLUMN IDX_CHARSET_COLUMN IDX_TYPE IDX_SYNC_TYPE IDX_SYNC_MEMORY IDX_SYNC_PARA_DEGREE IDX_SYNC_INTERVAL IDX_SYNC_JOBNAME IDX_QUERY_STATS_ENABLED IDX_AUTO_OPT_TYPE IDX_AUTO_OPT_INTERVAL IDX_AUTO_OPT_PARA_DEGREE
1 1754 AAA TEST_MULTI_COL AAA TEST_MULTI_COLUMN CTX_VALUE 2 INDEXED CONTEXT NO
1 1756 AAA TEST_MULTI_COL AAA TEST_MULTI_COLUMN CTX_VALUE 6 INDEXED CONTEXT ON COMMIT 67108864 NO
1 1757 AAA TEST_MULTI_COL AAA TEST_MULTI_COLUMN CTX_VALUE 6 INDEXED CONTEXT AUTOMATIC 536870912 SYSDATE+10/(24*60) DRTEST_MULTI_COLJ NO
create table TEST_MULTI_COLUMN
(Id Number(7)
,CLIENT varchar2(10 char)
,ADDRESS varchar2(10 char)
,CTX_VALUE varchar2(1))
/
insert into TEST_MULTI_COLUMN
(ID, CLIENT, ADDRESS, CTX_VALUE)
values
(1, 'ORACLE', Null, Null)
/
insert into TEST_MULTI_COLUMN
(ID, CLIENT, ADDRESS, CTX_VALUE)
values
(12, 'ABC', Null, Null)
/
select * from TEST_MULTI_COLUMN t
where contains(t.ctx_value, 'ABC') > 0
/
begin CTX_DDL.SYNC_INDEX('TEST_MULTI_COL'); end;
begin ctxsys.drvdml.auto_sync_index('"TEST_MULTI_COL"', 536870912, NULL, NULL, NULL, 0); end;
insert into TEST_MULTI_COLUMN
(ID, CLIENT, ADDRESS, CTX_VALUE)
values
(1, 'ORACLE', Null, 'A') ----'A' No neeed
/
select *From TEST_MULTI_COLUMN where CTX_VALUE='A';
Commit
/
select * from TEST_MULTI_COLUMN;
begin
ctx_ddl.create_preference('TEST_MULTI', 'MULTI_COLUMN_DATASTORE');
ctx_ddl.set_attribute('TEST_MULTI', 'columns', 'CLIENT,ADDRESS');
end;
/
drop index TEST_MULTI_COL
/
CREATE INDEX TEST_MULTI_COL ON TEST_MULTI_COLUMN(CTX_VALUE)
INDEXTYPE IS CTXSYS.CONTEXT
PARAMETERS ('datastore TEST_MULTI
filter ctxsys.null_filter
stoplist ctxsys.EMPTY_STOPLIST
memory 200M')
/
CREATE INDEX TEST_MULTI_COL ON TEST_MULTI_COLUMN(CTX_VALUE)
INDEXTYPE IS CTXSYS.CONTEXT
PARAMETERS ('datastore TEST_MULTI
filter ctxsys.null_filter
stoplist ctxsys.EMPTY_STOPLIST
sync (on commit)')
/
select * from TEST_MULTI_COLUMN t
where contains(t.ctx_value, 'oracle') > 0
/
select * from TEST_MULTI_COLUMN t
where contains(t.ctx_value, 'client') > 0
/
select * from TEST_MULTI_COLUMN t
where contains(t.ctx_value, 'address') > 0
/
select token_text, token_type from drtest_multi_coli
select * from drtest_multi_colk;
select * from drtest_multi_coln;
select * from drtest_multi_colu;