coreseek sphinx mmseg mysql 全文检索 安装 配置

张映 发表于 2014-08-01

分类目录: mysql

标签:, , , , , , ,

上次搞全文检索的东西 ,还要追溯到2010了,当时个人觉得coreseek还不成熟,就用了原始的sphinx,请参考:sphinx mmseg mysql 中文分词,这次呢,决定用一下coreseek,看看有什么区别。

一,安装环境和工具

服务器用的是centos6.5 x86

coreseek 4.0.1下载地址:http://www.coreseek.cn/uploads/csft/4.0/coreseek-4.0.1-beta.tar.gz

mysql 5.5.8下载地址:http://downloads.mysql.com/archives/community/

二,mmseg中文分词安装

1,旧版本安装方法

# tar zxvf coreseek-4.0.1-beta
# cd coreseek-4.0.1-beta/mmseg-3.2.14/
# aclocal
# libtoolize --force
# automake --add-missing
# autoconf
# autoheader
# make clean
# ./configure --prefix=/usr/local/mmseg3
# make
# make install

2,新版本安装方法

# ./bootstrap
# ./configure --prefix=/usr/local/mmseg3
# make
# make install

mmseg-3.2.14,二种方法都可以安装成功,在编译时有报warning,但不影响安装。

3,新版本mmseg安装后,分词的东西就好了,不要用mmseg去生成了

[root@localhost mmseg-3.2.14]# ll /usr/local/mmseg3/etc/
总用量 5432
-rwxr-xr-x 1 root root 229 7月 31 01:44 mmseg.ini
-rwxr-xr-x 1 root root 1826251 7月 31 01:44 unigram.txt
-rwxr-xr-x 1 root root 3729280 7月 31 01:44 uni.lib

4,mmseg.ini配置的官方说明

[mmseg]
merge_number_and_ascii=0; #合并英文和数字 abc123/x
number_and_ascii_joint=-; #定义可以连接英文和数字的字符
compress_space=1;         #暂不支持
seperate_number_ascii=0; #就是将字母和数字打散

三,csft(sphinx)安装

1,旧版安装

# cp /coreseek的解压目录/mmseg-3.2.14/src/*/*.h /usr/local/mmseg3/include/mmseg/
# cd /home/tank/download/coreseek-4.0.1-beta
# aclocal
# libtoolize --force
# automake --add-missing
# autoconf
# autoheader
# perl -pi -e 's/lpthread/lpthread -liconv/g' src/Makefile*
# make clean
# ./configure --prefix=/usr/local/sphinx --enable-id64 \
--with-mysql=/usr/local/mysql2 --with-mmseg \
--with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ \
--with-mmseg-libs=/usr/local/mmseg3/lib/
# make && make install

2,新版安装

# cd /home/tank/download/coreseek-4.0.1-beta
# ./buildconf.sh
# ./configure --prefix=/usr/local/sphinx --enable-id64 \
--with-mysql=/usr/local/mysql2 --with-mmseg \
--with-mmseg-includes=/usr/local/mmseg3/include/mmseg/ \
--with-mmseg-libs=/usr/local/mmseg3/lib/
# make && make install

coreseek-4.0.1,二种安装方式都是可以的,但是安装时,还是会报错。

四,mysql插件 sphinxse的安装

1,将mysqlse复制到mysql程序目录storage

# cp -R coreseek-4.0.1-beta/csft-4.0.1-beta/mysqlse /home/tank/download/mysql-5.5.8/storage/sphinx

2,cmake安装mysql 5.5.8

#cd /home/tank/download/mysql-5.5.8
# cmake -DCMAKE_INSTALL_PREFIX=/usr/local/mysql2 \
 -DMYSQL_UNIX_ADDR=/usr/local/mysql2/mysql.sock \
 -DCMAKE_BUILD_TYPE=Release -DWITH_SPHINX_STORAGE_ENGINE=1 \
 -DDEFAULT_CHARSET=utf8 \
 -DDEFAULT_COLLATION=utf8_general_ci \
 -DWITH_MYISAM_STORAGE_ENGINE=1 \
 -DWITH_INNOBASE_STORAGE_ENGINE=1 \
 -DWITH_MEMORY_STORAGE_ENGINE=1 \
 -DWITH_READLINE=1 \
 -DENABLED_LOCAL_INFILE=1 \
 -DMYSQL_DATADIR=/usr/local/mysql2/data \
 -DMYSQL_USER=mysql

# make && make install

cmake安装mysql高版本,请参考:linux cmake 安装mysql5.5.11,以及更高版本

3,configure安装mysql5.5.8

# cd /home/tank/download/mysql-5.5.8
# sh BUILD/autorun.sh
# ./configure --prefix=/usr/local/mysql2 \
--with-plugins=partition,innobase,myisam,sphinx \
--with-charset=utf8
# make && make install

4,初始化数据库

# cd /home/tank/download/mysql-5.5.8
# chmod +x scripts/mysql_install_db
# scripts/mysql_install_db --basedir=/usr/local/mysql2 \ #初始化数据
--datadir=/usr/local/mysql2/data --user=mysql
# cp support-files/my-medium.cnf /etc/my.cnf #copy配置文件
# chown -R mysql:mysql /usr/local/mysql2 #更改权限 

# vim /etc/my.cnf      #加上以下内容
[mysqld]
basedir = /usr/local/mysql2
datadir = /usr/local/mysql2/data
log-error = /usr/local/mysql2/mysql_error.log
pid-file = /usr/local/mysql2/mysql.pid
user = mysql
tmpdir = /tmp

5,启动并登录mysql

# cd /usr/local/mysql2/support-files/
# cp ./mysql.server /etc/init.d/mysql5
# /etc/init.d/mysql5 start              #启动服务端
# /usr/local/mysql2/bin/mysql           #客户端连接

6,启用sphinxse

sphinxse

sphinxse

在这里和以前安装sphinxse,有一点不同的是,以前mysql编译安装好了以后,就会有,不用在用root登录去install了。

五,配置sphinx.conf

# cd /usr/local/mysql2/support-files/
# cp ./mysql.server /etc/init.d/mysql5
# /etc/init.d/mysql5 start

# cd /usr/local/sphinx/etc
# cp sphinx.conf.dist sphinx.conf
# vim sphinx.conf 

source src1
{
 type = mysql
 sql_host = localhost
 sql_user = root //用名改一下
 sql_pass = 111111 //密码改一下,无密码,留空
 sql_db = test
 sql_query_pre = SET NAMES utf8 //这行注释去掉
 。。。。。。。。。。。省略。。。。。。。。。。。。。。
}
。。。。。。。。。。。省略。。。。。。。。。。。。。。
index rt //在index里面添加以下三行,加入中文分词功能
{
 type = rt
 path = /usr/local/sphinx/var/data/rt
 charset_dictpath = /usr/local/mmseg3/etc/ //添加
 charset_type = zh_cn.utf-8 //添加
 ngram_len = 0 //添加
 rt_field = title
 rt_field = content
 rt_attr_uint = gid
}
。。。。。。。。。。。省略。。。。。。。。。。。。。。

六,启动sphinx

//启动indexer
[root@localhost etc]# /usr/local/sphinx/bin/indexer --config /usr/local/sphinx/etc/sphinx.conf --all
Coreseek Fulltext 4.0 [ Sphinx 1.11-id64-dev (r2540)]
Copyright (c) 2007-2011,
Beijing Choice Software Technologies Inc (http://www.coreseek.com)

 using config file '/usr/local/sphinx/etc/sphinx.conf'...
indexing index 'test1'...
collected 4 docs, 0.0 MB
sorted 0.0 Mhits, 100.0% done
total 4 docs, 193 bytes
total 0.012 sec, 15540 bytes/sec, 322.08 docs/sec
indexing index 'test1stemmed'...
collected 4 docs, 0.0 MB
sorted 0.0 Mhits, 100.0% done
total 4 docs, 193 bytes
total 0.002 sec, 75097 bytes/sec, 1556.42 docs/sec
skipping non-plain index 'dist1'...
skipping non-plain index 'rt'...
total 6 reads, 0.000 sec, 0.1 kb/call avg, 0.0 msec/call avg
total 18 writes, 0.000 sec, 0.1 kb/call avg, 0.0 msec/call avg

//启动searchd
[root@localhost etc]# /usr/local/sphinx/bin/searchd --config /usr/local/sphinx/etc/sphinx.conf
Coreseek Fulltext 4.0 [ Sphinx 1.11-id64-dev (r2540)]
Copyright (c) 2007-2011,
Beijing Choice Software Technologies Inc (http://www.coreseek.com)

using config file '/usr/local/sphinx/etc/sphinx.conf'...
listening on all interfaces, port=9312
listening on all interfaces, port=9306
 precaching index 'test1'
precaching index 'test1stemmed'
WARNING: multiple addresses found for 'localhost', using the first one (ip=127.0.0.1)
precaching index 'rt'
precached 3 indexes in 0.001 sec


转载请注明
作者:海底苍鹰
地址:http://blog.51yip.com/mysql/1641.html?utm_source=ld246.com

1 条评论

  1. 飯糰 留言

    您好,首先感謝您的分享有關如何架設文章,針對內容我有些問題想提問
    1. 您是否有測試coreseek與mmseq的中文分詞效果與差異
    2. 現在下載不到coreseek,是否還有好的解決方案可以對應有關中文分詞的問題??
    3. 是否有對window架設的文章可以參考

    以上打擾,盡請見諒,謝謝!!