Home » , , » 哈工大分词java接口

哈工大分词java接口

package IRdll;

import java.io.File;
import java.io.Reader;
import java.io.FileInputStream;
import java.io.*;
import java.util.Date;
import java.util.HashSet;
/**
* Title: Java中文分词接口

* Description: 本组件以哈工大分词系统为基础,在其基础之上开发
* 本组件仅供学习和研究用途,任何商业用途将自行承担法律后果,与组件编写人无关。

* Copyright: Copyright (c) 2006

* Company: dalian univercity of techology

* @author :yezheng
* @version 1.0
*/
public class IRSplit {
private static IRSplit instance = null; //instance时类中一成员,所以可以访问其中被private修饰的变量或方法
private static StringBuffer stringb = new StringBuffer(102400);
private static StringBuffer longSentence = new StringBuffer(10240);
//私有构造方法
private IRSplit() {
System.out.println(“正在加载词典……”);
this.LoadSegRes();
System.out.println(“加载结束”);
}
//获得一个实例
public static IRSplit getInstance() {
if (instance == null) {
instance = new IRSplit();
}
return instance;
}
//本地方法
private native void LoadSegRes();
private native void ReleaseSegger();
private native String split(String sentence);
//对一句话分词
public String splitSentence(String sentence) {
if (sentence.length() < 1 || sentence == null)
return “”;
else
return split(sentence);
}
public String splitLongSentence(String sentence) {
if (sentence.length() < 1 || sentence == null)
return “”;
else
{
this.longSentence.setLength(0);
int start = 0 ; int  end = 0;
for (int i = 0; i < sentence.length(); i++)
{
char c = sentence.charAt(i);
switch (Character.getType(c))
{
case 24:
end++;
//System.out.println(sentence.substring(start, end));
this.longSentence.append(split(sentence.substring(start, end)));
start = end;
break;
default:
end++;
break;
}
}
if(start < end)
{
longSentence.append(split(sentence.substring(start, end)));
}
return longSentence.toString();
}
}
public void ReleaseSeggers() {
instance = null;
ReleaseSegger();
}
//对一个文件分词
public void splitFile(File file, File outfile) {
try {
FileInputStream fis = new FileInputStream(file);
BufferedReader br = new BufferedReader(new InputStreamReader(fis));
this.stringb.setLength(0);
String ts;
while ( (ts = br.readLine()) != null) {
if (ts.length() != 0) {
stringb.append(splitLongSentence(ts) + ‘r’ + ‘n’); //进行分词
}
else {
stringb.append(‘r’);
stringb.append(‘n’);
}
}
br.close();
fis.close();
FileWriter writer = new FileWriter(outfile);
writer.write(stringb.toString());
writer.close();
}
catch (FileNotFoundException ex) {
System.out.println(file.toString() + “File not Found”);
}
catch (IOException ex1) {
System.out.println(file.toString() + “IO errors”);
}
}
public void splitFile(String source, String destination) {
File file = new File(source);
File outfile = new File(destination);
if (file.isFile()) {
splitFile(file, outfile);
}
}
public Reader splitFile(Reader reader) {
BufferedReader br = new BufferedReader(reader);
StringBuffer stringb = new StringBuffer();
try {
String ts;
while ( (ts = br.readLine()) != null) {
if (ts.length() != 0) {
stringb.append(splitSentence(ts) + ‘r’ + ‘n’); //进行分词
}
else {
stringb.append(‘r’);
stringb.append(‘n’);
}
}
reader = new StringReader(stringb.toString());
}
catch (IOException ex) {
}
return reader;
}
//处理一个目录下的所有文件
public void splitFiles(String sourceDir, String destinationDir) { //参数:源文件目录和目标文件目录
File directory = new File(sourceDir);
File dirdes = new File(destinationDir);
//FilenameFilter txtFilter = new myFilter(“txt”);
File files[] = directory.listFiles();
for (int i = 0; i < files.length; i++) {
if (files[i].isFile()) {
File outfile = new File(destinationDir + “/” +
files[i].getName());
//System.out.println(directory.getName() + “:” + dirdes.getName());
splitFile(files[i], outfile);
}
else if (files[i].isDirectory()) {
File tempdir = new File(destinationDir + “/” + files[i].getName());
if (!tempdir.exists() || !tempdir.isDirectory()) {
tempdir.mkdir();
}
splitFiles(sourceDir + “/” + files[i].getName(),
tempdir.getAbsolutePath());
}
}
}
static {
System.loadLibrary(“IRdll”);
}
public static void main(String[] args) {
IRSplit split = IRSplit.getInstance(); //其它类使用
long start = System.currentTimeMillis();
Date startdate = new Date();
//split.splitFiles(“clean”, “out”);
split.splitLongSentence(ss);
Date enddate = new Date();
System.out.println(startdate);
System.out.println(enddate);
System.out.println(enddate.getTime()- startdate.getTime());
}
}

0 Comments:

Popular Posts