用英文写的,主要是命令和代码,就不翻译了,偷懒一下。
This article show you how to create a parser to handle camel case [3] in
string in Postgresql full text search. This parser is tested on Postgresql 8.3.9
version. The OS is ubuntu 8.10.
[1,2] will give you basic background and sample for old version. I update the
code to fit 8.3.9 version, especial lots of changes in the SQL statement.
Create parser
First, create a directory for put all files, for example: /home/user/Desktop/test_parser
Create file "test_parser.c". I use vim.
#include "postgres.h"
#include "utils/builtins.h"
#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif
/*
* types
*/
/* self-defined type */
typedef struct {
char * buffer; /* text to parse */
int len; /* length of the text in buffer */
int pos; /* position of the parser */
} ParserState;
/* copy-paste from wparser.h of tsearch2 */
typedef struct {
int lexid;
char *alias;
char *descr;
} LexDescr;
/*
* prototypes
*/
PG_FUNCTION_INFO_V1(testprs_start);
Datum testprs_start(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(testprs_getlexeme);
Datum testprs_getlexeme(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(testprs_end);
Datum testprs_end(PG_FUNCTION_ARGS);
PG_FUNCTION_INFO_V1(testprs_lextype);
Datum testprs_lextype(PG_FUNCTION_ARGS);
/*
* functions
*/
Datum testprs_start
(PG_FUNCTION_ARGS)
{
ParserState *pst = (ParserState *) palloc(sizeof(ParserState));
pst->buffer = (char *) PG_GETARG_POINTER(0);
pst->len = PG_GETARG_INT32(1);
pst->pos = 0;
PG_RETURN_POINTER(pst);
}
Datum testprs_getlexeme
(PG_FUNCTION_ARGS)
{
ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
char **t = (char **) PG_GETARG_POINTER(1);
int *tlen = (int *) PG_GETARG_POINTER(2);
int type;
*tlen = pst->pos;
*t = pst->buffer + pst->pos;
HTML clipboard/* main process here */
if (((pst->buffer)[pst->pos] >= 'A' && (pst->buffer)[pst->pos] <= 'Z') &&
(pst->pos < pst->len)) {
/* word type */
type = 3;
(pst->pos)++;
/* for case like: ItemURL => Item URL*/
if(((pst->buffer)[pst->pos] >= 'A' && (pst->buffer)[pst->pos] <= 'Z') && (pst->pos
< pst->len)){
while (((pst->buffer)[pst->pos] >= 'A' && (pst->buffer)[pst->pos] <= 'Z')
&& (pst->pos < pst->len)) {
(pst->pos)++;
}
}
/* go to the next upper case character */
while (((pst->buffer)[pst->pos] >= 'a' && (pst->buffer)[pst->pos] <= 'z')
&& (pst->pos < pst->len)) {
(pst->pos)++;
}
}
if (((pst->buffer)[pst->pos] >= 'a' && (pst->buffer)[pst->pos] <= 'z') &&
(pst->pos < pst->len)) {
/* word type */
type = 3;
(pst->pos)++;
/* go to the next upper case character */
while (((pst->buffer)[pst->pos] >= 'a' && (pst->buffer)[pst->pos] <= 'z')
&& (pst->pos < pst->len)) {
(pst->pos)++;
}
}
*tlen = pst->pos - *tlen;
/* we are finished if (*tlen == 0) */
if (*tlen == 0) type=0;
PG_RETURN_INT32(type);
}
Datum testprs_end
(PG_FUNCTION_ARGS)
{
ParserState *pst = (ParserState *) PG_GETARG_POINTER(0);
pfree(pst);
PG_RETURN_VOID();
}
Datum testprs_lextype
(PG_FUNCTION_ARGS)
{
/*
Remarks:
- we have to return the blanks for headline reason
- we use the same lexids like Teodor in the default
word parser; in this way we can reuse the headline
function of the default word parser.
*/
LexDescr *descr = (LexDescr *) palloc(sizeof(LexDescr) * (2+1));
/* there are only two types in this parser */
descr[0].lexid = 3;
descr[0].alias = pstrdup("word");
descr[0].descr = pstrdup("Word");
descr[1].lexid = 12;
descr[1].alias = pstrdup("blank");
descr[1].descr = pstrdup("Space symbols");
descr[2].lexid = 0;
PG_RETURN_POINTER(descr);
}
|
Create "Makefile" file under the same directory.
override CPPFLAGS := -I. $(CPPFLAGS)
MODULE_big = test_parser
OBJS = test_parser.o
DATA_built = test_parser.sql
DATA =
REGRESS = test_parser
ifdef USE_PGXS
PGXS := $(shell pg_config --pgxs)
include $(PGXS)
else
subdir = contrib/test_parser
top_builddir = ../..
include $(top_builddir)/src/Makefile.global
include $(top_srcdir)/contrib/contrib-global.mk
endif
|
Create "test_parser.sql" file under the same directory.
SET search_path = public;
BEGIN;
--DROP TEXT SEARCH CONFIGURATION testcfg;
--DROP TEXT SEARCH PARSER testparser;
CREATE OR REPLACE FUNCTION testprs_start(internal,int4)
RETURNS internal
AS '$libdir/test_parser'
LANGUAGE 'C';
CREATE OR REPLACE FUNCTION testprs_getlexeme(internal,internal,internal)
RETURNS internal
AS '$libdir/test_parser'
LANGUAGE 'C';
CREATE OR REPLACE FUNCTION testprs_end(internal)
RETURNS void
AS '$libdir/test_parser'
LANGUAGE 'C';
CREATE OR REPLACE FUNCTION testprs_lextype(internal)
RETURNS internal
AS '$libdir/test_parser'
LANGUAGE 'C';
CREATE TEXT SEARCH PARSER testparser(
START = 'testprs_start',
GETTOKEN = 'testprs_getlexeme',
END = 'testprs_end',
LEXTYPES = 'testprs_lextype'
);
CREATE TEXT SEARCH CONFIGURATION testcfg (
PARSER ='testparser'
);
ALTER TEXT SEARCH CONFIGURATION testcfg ADD MAPPING FOR word WITH
english_stem;
END;
|
Note: the bold part is total changed for the 8.3.9
version.
"DROP TEXT SEARCH
"
is very useful if you make modification and load the script again.
Install the parser:
- open a terminal, switch to the directory you just create. For example:
cd /home/user/Desktop/test_parser
- type: sudo PATH=/usr/local/pgsql/bin:$PATH USE_PGXS=1 make install
- switch to user "prostgres", type: /usr/local/pgsql/bin/psql
-f /usr/local/pgsql/share/contrib/test_parser.sql your-database
SET
BEGIN
CREATE FUNCTION
CREATE FUNCTION
CREATE FUNCTION
CREATE FUNCTION
DROP TEXT SEARCH CONFIGURATION
DROP TEXT SEARCH PARSER
CREATE TEXT SEARCH PARSER
CREATE TEXT SEARCH CONFIGURATION
ALTER TEXT SEARCH CONFIGURATION
COMMIT
|
- type: /usr/local/pgsql/bin/psql postgres
- test the parser:
postgres=# SELECT
to_tsvector('testcfg','itemValue');
to_tsvector
--------------------
'item':1 'value':2
(1 row)
postgres=# SELECT to_tsvector('testcfg','ItemValue');
to_tsvector
--------------------
'item':1 'value':2
(1 row)
postgres=# SELECT to_tsquery('testcfg','itemsValue');
to_tsquery
-------------------
'items' & 'value'
(1 row)
|
Reference:
[1]
http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/HOWTO-parser-tsearch2.html
[2]
http://www.pgcon.org/2007/schedule/attachments/12-fts.pdf
[3]
http://en.wikipedia.org/wiki/CamelCase
[4]
http://www.sai.msu.su/~megera/postgres/fts/doc/sql-fts-createmap.html
[5]
http://www.postgresql.org/docs/8.3/static/textsearch-configuration.html
[6]
http://developer.postgresql.org/pgdocs/postgres/sql-createtsconfig.html
[7]
http://www.postgresql.org/docs/7.4/interactive/sql-createfunction.html
分享到:
相关推荐
Python编写PostgreSQL数据库结构比对程序源代码,该源代码导入PyCharm即可运行使用。
PostgreSQL数据库内核分析PostgreSQL数据库内核分析PostgreSQL数据库内核分析PostgreSQL数据库内核分析PostgreSQL数据库内核分析PostgreSQL数据库内核分析PostgreSQL数据库内核分析PostgreSQL数据库内核分析...
C# 操作PostgreSQL 数据库 .
《PostgreSQL数据库内核分析》彭智勇 完整版44M
asp连接postgresql数据库 的源码
postgresql数据库jdbc驱动,jar包。。。。。。。。。。。。
postGREsql数据库资料,经典资料,带完整书签。可以复制文字
1.nacos服务,适配postgresql数据库。 2.提供nacos,postgresql的创建nacos数据库脚本。 3.nacos/conf/nacos-pg.sql数据库脚本文件。 4.nacos版本1.4.2。
如何恢复PostgreSQL数据库. 如何恢复PostgreSQL数据库
postgresql数据库定时备份脚本(linux),亲测可用总结
PostgreSQL数据库内核分析完整版。 作者简介: 彭智勇:武汉大学计算机学院教授,博士生导师,日本京都大学博士,武汉大学珞珈特聘教授,中国计算机学会数据库专业委员会委员,长期从事数据库理论与技术的研究和教学...
postgresQL数据库备份脚本,添加到任务计划中就可以备份数据库文件了。
Window 下的PostgreSQL 数据库备份和恢复工具[GUI],供大家一起共同分享使用。
postgresql数据库备份和恢复文档资料,详细介绍了PostgreSQL数据库的备份与恢复 方法,值得学习下