for merge
This commit is contained in:
parent
fbfdcb08c9
commit
a0bbd87f99
|
@ -110,8 +110,10 @@
|
|||
/gather_program/.settings/
|
||||
/gather_program/.classpath
|
||||
|
||||
#/crawler/dailyScheduledCrawler/fetch_networks/target/
|
||||
#/crawler/dailyScheduledCrawler/fetch_networks/log/*
|
||||
#/crawler/daily_scheduler/log/*
|
||||
#/crawler/moreSmarterCrawler/fetch_networks/target/
|
||||
#/crawler/moreSmarterCrawler/fetch_networks/log/*
|
||||
|
||||
/crawler/dailyScheduledCrawler/fetch_networks/target/
|
||||
/crawler/dailyScheduledCrawler/fetch_networks/log/*
|
||||
/crawler/daily_scheduler/log/*
|
||||
/crawler/daily_scheduler/log_mem/*
|
||||
/crawler/moreSmarterCrawler/fetch_networks/target/
|
||||
/crawler/moreSmarterCrawler/fetch_networks/log/*
|
||||
|
|
|
@ -16,4 +16,4 @@ JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTe
|
|||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH org.ossean.gather.process.GatherProcess gather_projects >>log/gather_projects.log 2>&1 &
|
||||
java $JVM_ARGS -classpath $CLASSPATH org.ossean.gather.process.GatherProcess gather_projects > log/gather_projects.log 2>&1 &
|
|
@ -17,4 +17,4 @@ JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTe
|
|||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH org.ossean.gather.process.GatherProcess job_requirements >>log/job_requirements.log 2>&1 &
|
||||
java $JVM_ARGS -classpath $CLASSPATH org.ossean.gather.process.GatherProcess job_requirements > log/job_requirements.log 2>&1 &
|
|
@ -17,4 +17,4 @@ JVM_ARGS="-Xmn98m -Xmx2048m -Xms1024m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:Max
|
|||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH org.ossean.gather.process.GatherProcess relative_memos >>log/relative_memos.log 2>&1 &
|
||||
java $JVM_ARGS -classpath $CLASSPATH org.ossean.gather.process.GatherProcess relative_memos > log/relative_memos.log 2>&1 &
|
|
@ -39,9 +39,9 @@
|
|||
destroy-method="close">
|
||||
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
|
||||
<property name="url"
|
||||
value="jdbc:mysql://172.16.128.30:3306/ossean_production?characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&autoReconnect=true" />
|
||||
value="jdbc:mysql://172.16.128.36:3306/ossean_production?characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&autoReconnect=true" />
|
||||
<property name="username" value="gather" />
|
||||
<property name="password" value="influx1234" />
|
||||
<property name="password" value="Influx@1234" />
|
||||
<property name="validationQuery" value="SELECT 1" />
|
||||
<property name="testOnBorrow" value="true"/>
|
||||
</bean>
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
<properties>
|
||||
<comment>TableFlow</comment>
|
||||
<entry key="pointerTableName">pointers</entry>
|
||||
<entry key="sourceTableName">oschina_question,slashdot,iteye_blog,51cto_blog,csdn_question,cnblog_news,cnblog_question,csdn_bbs,csdn_blogs,dewen_question,stackoverflow</entry>
|
||||
<entry key="sourceTableName">oschina_question,iteye_blog,51cto_blog,csdn_question,cnblog_news,cnblog_question,csdn_bbs,csdn_blogs,dewen_question,stackoverflow</entry>
|
||||
<entry key="targetTableName">relative_memos</entry>
|
||||
<entry key="sourceFields">id,title,content,created_time,now(),type,tags,source,url,url_md5,author,author_url,view_num,review_num,extracted_time</entry>
|
||||
<entry key="targetFields">id,title,content,created_time,updated_time,memo_type,tags,source,url,url_md5,author,author_url,view_num,review_num,extracted_time</entry>
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
/*
|
||||
Navicat MySQL Data Transfer
|
||||
|
||||
Source Server : ossean
|
||||
Source Server Version : 50535
|
||||
Source Host : 127.0.0.1:3306
|
||||
Source Database : ossean_new
|
||||
|
||||
Target Server Type : MYSQL
|
||||
Target Server Version : 50535
|
||||
File Encoding : 65001
|
||||
|
||||
Date: 2016-11-15 20:04:15
|
||||
*/
|
||||
|
||||
SET FOREIGN_KEY_CHECKS=0;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for settings
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `settings`;
|
||||
CREATE TABLE `settings` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`name` varchar(255) NOT NULL DEFAULT '',
|
||||
`value` text,
|
||||
`updated_on` datetime DEFAULT NULL,
|
||||
PRIMARY KEY (`id`),
|
||||
KEY `index_settings_on_name` (`name`) USING BTREE
|
||||
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
|
|
@ -0,0 +1,36 @@
|
|||
/*
|
||||
Navicat MySQL Data Transfer
|
||||
|
||||
Source Server : ossean
|
||||
Source Server Version : 50535
|
||||
Source Host : 127.0.0.1:3306
|
||||
Source Database : ossean_production
|
||||
|
||||
Target Server Type : MYSQL
|
||||
Target Server Version : 50535
|
||||
File Encoding : 65001
|
||||
|
||||
Date: 2016-11-13 22:21:05
|
||||
*/
|
||||
|
||||
SET FOREIGN_KEY_CHECKS=0;
|
||||
|
||||
-- ----------------------------
|
||||
-- Table structure for taggings
|
||||
-- ----------------------------
|
||||
DROP TABLE IF EXISTS `taggings`;
|
||||
CREATE TABLE `taggings` (
|
||||
`id` int(11) NOT NULL AUTO_INCREMENT,
|
||||
`tag_id` int(11) NOT NULL,
|
||||
`taggable_id` int(11) NOT NULL,
|
||||
`taggable_type` varchar(255) NOT NULL,
|
||||
`tagger_id` int(11) DEFAULT NULL,
|
||||
`tagger_type` varchar(255) DEFAULT NULL,
|
||||
`context` varchar(128) DEFAULT NULL,
|
||||
`created_at` datetime DEFAULT NULL,
|
||||
`created_time` datetime DEFAULT NULL,
|
||||
`disagree_num` int(11) DEFAULT '0',
|
||||
`tag_source` varchar(255) DEFAULT NULL,
|
||||
PRIMARY KEY (`id`),
|
||||
UNIQUE KEY `index_taggings_on_tag_id_and_taggable_id_and_taggable_type` (`tag_id`,`taggable_id`,`taggable_type`) USING BTREE
|
||||
) ENGINE=InnoDB AUTO_INCREMENT=19315557 DEFAULT CHARSET=utf8;
|
|
@ -1,363 +1,363 @@
|
|||
package org.ossean.gather.process;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.ossean.gather.model.Configure;
|
||||
import org.ossean.gather.model.GatherProject;
|
||||
import org.ossean.gather.model.JobRequirement;
|
||||
import org.ossean.gather.model.PKControlPosts;
|
||||
import org.ossean.gather.model.PKControlProjects;
|
||||
import org.ossean.gather.model.RelativeMemo;
|
||||
import org.ossean.gather.model.Taggings;
|
||||
import org.ossean.gather.sourceDao.GatherDao;
|
||||
import org.ossean.gather.sourceDao.PKControlPostsDao;
|
||||
import org.ossean.gather.sourceDao.PKControlProjectsDao;
|
||||
import org.ossean.gather.targetDao.PointerDao;
|
||||
import org.ossean.gather.targetDao.TargetDao;
|
||||
import org.springframework.context.annotation.Scope;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component("gatherThread")
|
||||
@Scope("prototype")
|
||||
public class GatherThread implements Runnable {
|
||||
private static Logger logger = Logger.getLogger(GatherThread.class);
|
||||
private Configure conf;
|
||||
|
||||
@Resource
|
||||
private GatherDao gatherDao;
|
||||
@Resource
|
||||
private PointerDao pointerDao;
|
||||
@Resource
|
||||
private PKControlPostsDao pkControlPostsDao;
|
||||
@Resource
|
||||
private TargetDao targetDao;
|
||||
@Resource
|
||||
private PKControlProjectsDao pkControlProjectsDao;
|
||||
|
||||
private int idsBegin; // 转移开始Id值
|
||||
private int idsEnd; // 转移结束Id值
|
||||
private int idsIncrement;// 每次转移的Id量
|
||||
|
||||
private int beginId;
|
||||
private int endId;
|
||||
|
||||
private String sourceTableName;
|
||||
private String pkControlPostsTableName = "pk_control_posts";
|
||||
private String pkControlProjectsTableName = "pk_control_projects";
|
||||
private String taggingsTableName = "taggings";
|
||||
private String tagsTableName = "tags";
|
||||
|
||||
private String gatherPostsTableName = "relative_memos";
|
||||
private int maxId;
|
||||
|
||||
public void setParameters(Configure conf, String sourceTableName) {
|
||||
this.conf = conf;
|
||||
this.sourceTableName = sourceTableName;
|
||||
}
|
||||
|
||||
// 读指针
|
||||
public int readPointer(String table, String source, String target) {
|
||||
int pointer = 1;
|
||||
try {
|
||||
pointer = pointerDao.getPointer(table, source, target);
|
||||
} catch (Exception e) {
|
||||
// 表示表中没有数据
|
||||
logger.info("No such pointer! Create one");
|
||||
pointerDao.insertPointer(table, source, target, 1);
|
||||
}
|
||||
return pointer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
// long start = System.currentTimeMillis();
|
||||
Thread.currentThread().setName(sourceTableName);
|
||||
idsIncrement = conf.getIdsIncrement();
|
||||
idsBegin = readPointer(conf.getPointerTableName(), sourceTableName,
|
||||
conf.getTargetTableName());
|
||||
idsEnd = maxId = gatherDao.getMaxId(sourceTableName);
|
||||
while (idsBegin < idsEnd) {
|
||||
beginId = idsBegin;
|
||||
endId = beginId + idsIncrement - 1; // 取数据时两边都取等号
|
||||
if (endId <= maxId) {
|
||||
handleBatchData(beginId, endId, conf);
|
||||
idsBegin = idsBegin + idsIncrement;
|
||||
} else {
|
||||
endId = maxId; // endId应小于maxId
|
||||
handleBatchData(beginId, endId, conf);
|
||||
break;
|
||||
}
|
||||
}
|
||||
GatherProcess.gatherState.put(sourceTableName, false);
|
||||
// long end = System.currentTimeMillis();
|
||||
// logger.info((end - start) / 6000);
|
||||
}
|
||||
|
||||
public void handleBatchData(int beginId, int endId, Configure conf) {
|
||||
logger.info("BeginId#" + sourceTableName + ":" + beginId);
|
||||
// 表示任务没有完成
|
||||
int maxId = gatherDao.getMaxId(sourceTableName);
|
||||
// 防止转移超过当前最大值的Id数据
|
||||
if (beginId >= 0 && endId > 0 && maxId >= endId) {
|
||||
// 更新执行开始时间
|
||||
logger.info("begin gathering...");
|
||||
// 插入Id段数据,忽略重复值
|
||||
try {
|
||||
String[] sourceFields = conf.getSourceFields().split(",");
|
||||
String[] targetFields = conf.getTargetFields().split(",");
|
||||
String selectItems = "";
|
||||
for (int i = 0; i < sourceFields.length; i++) {
|
||||
String str_source = sourceFields[i];
|
||||
String str_target = targetFields[i];
|
||||
selectItems += str_source + " as " + str_target + ",";
|
||||
}
|
||||
selectItems = selectItems
|
||||
.substring(0, selectItems.length() - 1) + " ";
|
||||
if (conf.getTargetTableName().equals("relative_memos")) {
|
||||
List<RelativeMemo> dataGet = gatherDao.getPostGatherData(
|
||||
sourceTableName, selectItems, beginId, endId,
|
||||
conf.getAndWhere());
|
||||
for (int i = 0; i < dataGet.size(); i++) {
|
||||
RelativeMemo model = dataGet.get(i);
|
||||
String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该帖子
|
||||
// 是否更新
|
||||
int postId = 0;
|
||||
if(GatherProcess.urlMd5Set.contains(urlMD5)){
|
||||
//urlmd5存在则更新
|
||||
RelativeMemo samePost = targetDao.findPostByUrlMD5(
|
||||
conf.getTargetTableName(), urlMD5);
|
||||
// update gather_projects表中对应的记录,在维持待更新表
|
||||
postId = samePost.getId();
|
||||
model.setId(postId);
|
||||
handleUpdateGatherPosts(samePost.getId(), model);
|
||||
}else{
|
||||
// 不存在 插入
|
||||
PKControlPosts pkControlModel = pkControlPostsDao
|
||||
.selectItemByUrlMD5(
|
||||
pkControlPostsTableName, urlMD5);// 查看有没有固定的id
|
||||
if (pkControlModel != null)
|
||||
model.setId(pkControlModel.getId());
|
||||
else {
|
||||
// 在pk_control_posts表中生成当前项目对应的id
|
||||
pkControlPostsDao.insertOneItem(
|
||||
pkControlPostsTableName, urlMD5);
|
||||
// 查看刚刚插入信息的id
|
||||
PKControlPosts controlItem = pkControlPostsDao
|
||||
.selectItemByUrlMD5(
|
||||
pkControlPostsTableName, urlMD5);
|
||||
// 用id构造model对应的固定不变的id
|
||||
model.setId(controlItem.getId());
|
||||
postId = model.getId();
|
||||
}
|
||||
handleInsertGatherPosts(model, conf);
|
||||
GatherProcess.urlMd5Set.add(urlMD5);
|
||||
}
|
||||
|
||||
// 将tag和项目的关系存入表item_tag_relation 并分离tag
|
||||
String tags = model.getTags();
|
||||
if (tags == null) {
|
||||
// 表示该项目没有标签
|
||||
continue;
|
||||
}
|
||||
List<String> tagList = DataHandler
|
||||
.tagsSegmentation(tags);
|
||||
for (String tag : tagList) {
|
||||
targetDao.insertTag(tagsTableName, tag);// ignore方式插入该项目的标签
|
||||
int tag_id = targetDao.selectTagIdByName(
|
||||
tagsTableName, tag);
|
||||
Taggings taggings = new Taggings();
|
||||
taggings.setTag_id(tag_id);
|
||||
taggings.setTaggable_id(postId);
|
||||
taggings.setTaggable_type("RelativeMemo");
|
||||
taggings.setContext("tags");
|
||||
taggings.setCreated_at(DataHandler.getNow());
|
||||
// 将Taggings对象存入数据库中
|
||||
try {
|
||||
targetDao.insertTaggings(taggingsTableName,
|
||||
taggings);
|
||||
} catch (Exception e) {
|
||||
// 在插入记录之前 relative_memos表中的记录已经被删除掉了
|
||||
logger.error(e);
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (conf.getTargetTableName().equals("gather_projects")) {
|
||||
List<GatherProject> dataGet = gatherDao.getPrjGatherData(
|
||||
sourceTableName, selectItems, beginId, endId,
|
||||
conf.getAndWhere());
|
||||
for (int i = 0; i < dataGet.size(); i++) {
|
||||
GatherProject model = dataGet.get(i);
|
||||
String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该项目
|
||||
// 是否更新
|
||||
int prjId = 0;
|
||||
if(GatherProcess.urlMd5Set.contains(urlMD5)){
|
||||
GatherProject samePrj = targetDao.findPrjByUrlMD5(
|
||||
conf.getTargetTableName(), urlMD5);
|
||||
// update gather_projects表中对应的记录,在维持待更新表
|
||||
prjId = samePrj.getId();
|
||||
model.setId(prjId);
|
||||
model.setUpdate_mark(1);
|
||||
handleUpdateGatherProjects(samePrj.getId(), model);
|
||||
}else{
|
||||
// 不存在 插入
|
||||
PKControlProjects pkControlProjects = pkControlProjectsDao
|
||||
.selectItemByUrlMD5(
|
||||
pkControlProjectsTableName, urlMD5);// 查看有没有固定的id
|
||||
if (pkControlProjects != null)
|
||||
model.setId(pkControlProjects.getId());
|
||||
else {
|
||||
// 在pk_control_posts表中生成当前项目对应的id
|
||||
pkControlProjectsDao.insertOneItem(
|
||||
pkControlProjectsTableName, urlMD5);
|
||||
// 查看刚刚插入信息的id
|
||||
PKControlProjects controlItem = pkControlProjectsDao
|
||||
.selectItemByUrlMD5(
|
||||
pkControlProjectsTableName,
|
||||
urlMD5);
|
||||
// 用id构造model对应的固定不变的id
|
||||
model.setId(controlItem.getId());
|
||||
prjId = model.getId();
|
||||
}
|
||||
model.setUpdate_mark(0);
|
||||
handleInsertGatherProjects(model, conf);
|
||||
GatherProcess.urlMd5Set.add(urlMD5);
|
||||
}
|
||||
|
||||
// // 将tag和项目的关系存入表item_tag_relation 并分离tag
|
||||
// String tags = model.getTags();
|
||||
// if (tags == null) {
|
||||
// // 表示该项目没有标签
|
||||
// continue;
|
||||
// }
|
||||
// List<String> tagList = DataHandler
|
||||
// .tagsSegmentation(tags);
|
||||
// for (String tag : tagList) {
|
||||
// targetDao.insertTag(tagsTableName, tag);// ignore方式插入该项目的标签
|
||||
// int tag_id = targetDao.selectTagIdByName(
|
||||
// tagsTableName, tag);
|
||||
// Taggings taggings = new Taggings();
|
||||
// taggings.setTag_id(tag_id);
|
||||
// taggings.setTaggable_id(prjId);
|
||||
// taggings.setTaggable_type("OpenSourceProject");
|
||||
// taggings.setContext("tags");
|
||||
// taggings.setCreated_at(DataHandler.getNow());
|
||||
// // 将Taggings对象存入数据库中
|
||||
// try {
|
||||
// targetDao.insertTaggings(taggingsTableName,
|
||||
// taggings);
|
||||
// } catch (Exception e) {
|
||||
// // 在插入记录之前 relative_memos表中的记录已经被删除掉了
|
||||
// logger.error(e);
|
||||
// System.exit(0);
|
||||
// }
|
||||
// }
|
||||
}
|
||||
} else {
|
||||
List<JobRequirement> dataGet = gatherDao.getJobGatherData(
|
||||
sourceTableName, selectItems, beginId, endId,
|
||||
conf.getAndWhere());
|
||||
for (int i = 0; i < dataGet.size(); i++) {
|
||||
JobRequirement model = dataGet.get(i);
|
||||
String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该帖子
|
||||
// 是否更新
|
||||
int postId = 0;
|
||||
if(GatherProcess.urlMd5Set.contains(urlMD5)){
|
||||
JobRequirement sameJob = targetDao.findJobByUrlMD5(
|
||||
conf.getTargetTableName(), urlMD5);
|
||||
// update gather_projects表中对应的记录,在维持待更新表
|
||||
postId = sameJob.getId();
|
||||
model.setId(postId);
|
||||
handleUpdateGatherJobs(sameJob.getId(), model);
|
||||
}else{
|
||||
// 不存在 插入
|
||||
PKControlPosts pkControlPosts = pkControlPostsDao
|
||||
.selectItemByUrlMD5(
|
||||
pkControlPostsTableName, urlMD5);// 查看有没有固定的id
|
||||
if (pkControlPosts != null)
|
||||
model.setId(pkControlPosts.getId());
|
||||
else {
|
||||
// 在pk_control_posts表中生成当前项目对应的id
|
||||
pkControlPostsDao.insertOneItem(
|
||||
pkControlPostsTableName, urlMD5);
|
||||
// 查看刚刚插入信息的id
|
||||
PKControlPosts controlItem = pkControlPostsDao
|
||||
.selectItemByUrlMD5(
|
||||
pkControlPostsTableName, urlMD5);
|
||||
// 用id构造model对应的固定不变的id
|
||||
model.setId(controlItem.getId());
|
||||
postId = model.getId();
|
||||
}
|
||||
handleInsertGatherJobs(model, conf);
|
||||
GatherProcess.urlMd5Set.add(urlMD5);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (Exception ex) {
|
||||
// 数据迁移过程可能发生异常情况
|
||||
logger.error(ex);
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
// 更新游标到本次 EndId+1;
|
||||
pointerDao.updatePointer(conf.getPointerTableName(),
|
||||
sourceTableName, conf.getTargetTableName(), endId + 1);// sourceIdBegin
|
||||
// +
|
||||
// idsIncrement
|
||||
logger.info("current--" + sourceTableName + ": " + endId);
|
||||
}
|
||||
}
|
||||
|
||||
// 处理URL不存在的帖子 插入relative_memos表
|
||||
public void handleInsertGatherPosts(RelativeMemo model, Configure conf) {
|
||||
try {
|
||||
targetDao.insertRelativeMemo(conf.getTargetTableName(),
|
||||
conf.getTargetFields(), model);
|
||||
} catch (Exception e) {
|
||||
logger.error(e);
|
||||
}
|
||||
}
|
||||
|
||||
// 处理URL相同的帖子更新 id表示更新的帖子固定id
|
||||
public void handleUpdateGatherPosts(int id, RelativeMemo model_new) {
|
||||
targetDao.updateRelativeMemo(gatherPostsTableName, model_new, id);// 更新数据relative_memos表
|
||||
}
|
||||
|
||||
// 处理URL不存在的项目 插入gather_projects表
|
||||
public void handleInsertGatherProjects(GatherProject model, Configure conf) {
|
||||
try {
|
||||
targetDao.insertOpenSourceProject(conf.getTargetTableName(),
|
||||
conf.getTargetFields(), model);
|
||||
} catch (Exception e) {
|
||||
logger.error(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// 处理URL相同的项目更新 id表示更新的项目固定id
|
||||
public void handleUpdateGatherProjects(int id, GatherProject model_new) {
|
||||
targetDao.updateOpenSourceProject(conf.getTargetTableName(), model_new,
|
||||
id);// 更新数据gather_projects表
|
||||
}
|
||||
|
||||
// 处理URL不存在的项目 插入job_requirements表
|
||||
public void handleInsertGatherJobs(JobRequirement model, Configure conf) {
|
||||
try {
|
||||
targetDao.insertJobRequirement(conf.getTargetTableName(),
|
||||
conf.getTargetFields(), model);
|
||||
} catch (Exception e) {
|
||||
logger.error(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// 处理URL相同的项目更新 id表示更新的项目固定id
|
||||
public void handleUpdateGatherJobs(int id, JobRequirement model_new) {
|
||||
targetDao
|
||||
.updateJobRequirement(conf.getTargetTableName(), model_new, id);// 更新数据job_requirements表
|
||||
}
|
||||
}
|
||||
package org.ossean.gather.process;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.ossean.gather.model.Configure;
|
||||
import org.ossean.gather.model.GatherProject;
|
||||
import org.ossean.gather.model.JobRequirement;
|
||||
import org.ossean.gather.model.PKControlPosts;
|
||||
import org.ossean.gather.model.PKControlProjects;
|
||||
import org.ossean.gather.model.RelativeMemo;
|
||||
import org.ossean.gather.model.Taggings;
|
||||
import org.ossean.gather.sourceDao.GatherDao;
|
||||
import org.ossean.gather.sourceDao.PKControlPostsDao;
|
||||
import org.ossean.gather.sourceDao.PKControlProjectsDao;
|
||||
import org.ossean.gather.targetDao.PointerDao;
|
||||
import org.ossean.gather.targetDao.TargetDao;
|
||||
import org.springframework.context.annotation.Scope;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component("gatherThread")
|
||||
@Scope("prototype")
|
||||
public class GatherThread implements Runnable {
|
||||
private static Logger logger = Logger.getLogger(GatherThread.class);
|
||||
private Configure conf;
|
||||
|
||||
@Resource
|
||||
private GatherDao gatherDao;
|
||||
@Resource
|
||||
private PointerDao pointerDao;
|
||||
@Resource
|
||||
private PKControlPostsDao pkControlPostsDao;
|
||||
@Resource
|
||||
private TargetDao targetDao;
|
||||
@Resource
|
||||
private PKControlProjectsDao pkControlProjectsDao;
|
||||
|
||||
private int idsBegin; // 转移开始Id值
|
||||
private int idsEnd; // 转移结束Id值
|
||||
private int idsIncrement;// 每次转移的Id量
|
||||
|
||||
private int beginId;
|
||||
private int endId;
|
||||
|
||||
private String sourceTableName;
|
||||
private String pkControlPostsTableName = "pk_control_posts";
|
||||
private String pkControlProjectsTableName = "pk_control_projects";
|
||||
private String memoTaggingsTableName = "memo_taggings";
|
||||
private String tagsTableName = "tags";
|
||||
|
||||
private String gatherPostsTableName = "relative_memos";
|
||||
private int maxId;
|
||||
|
||||
public void setParameters(Configure conf, String sourceTableName) {
|
||||
this.conf = conf;
|
||||
this.sourceTableName = sourceTableName;
|
||||
}
|
||||
|
||||
// 读指针
|
||||
public int readPointer(String table, String source, String target) {
|
||||
int pointer = 1;
|
||||
try {
|
||||
pointer = pointerDao.getPointer(table, source, target);
|
||||
} catch (Exception e) {
|
||||
// 表示表中没有数据
|
||||
logger.info("No such pointer! Create one");
|
||||
pointerDao.insertPointer(table, source, target, 1);
|
||||
}
|
||||
return pointer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void run() {
|
||||
// long start = System.currentTimeMillis();
|
||||
Thread.currentThread().setName(sourceTableName);
|
||||
idsIncrement = conf.getIdsIncrement();
|
||||
idsBegin = readPointer(conf.getPointerTableName(), sourceTableName,
|
||||
conf.getTargetTableName());
|
||||
idsEnd = maxId = gatherDao.getMaxId(sourceTableName);
|
||||
while (idsBegin < idsEnd) {
|
||||
beginId = idsBegin;
|
||||
endId = beginId + idsIncrement - 1; // 取数据时两边都取等号
|
||||
if (endId <= maxId) {
|
||||
handleBatchData(beginId, endId, conf);
|
||||
idsBegin = idsBegin + idsIncrement;
|
||||
} else {
|
||||
endId = maxId; // endId应小于maxId
|
||||
handleBatchData(beginId, endId, conf);
|
||||
break;
|
||||
}
|
||||
}
|
||||
GatherProcess.gatherState.put(sourceTableName, false);
|
||||
// long end = System.currentTimeMillis();
|
||||
// logger.info((end - start) / 6000);
|
||||
}
|
||||
|
||||
public void handleBatchData(int beginId, int endId, Configure conf) {
|
||||
logger.info("BeginId#" + sourceTableName + ":" + beginId);
|
||||
// 表示任务没有完成
|
||||
int maxId = gatherDao.getMaxId(sourceTableName);
|
||||
// 防止转移超过当前最大值的Id数据
|
||||
if (beginId >= 0 && endId > 0 && maxId >= endId) {
|
||||
// 更新执行开始时间
|
||||
logger.info("begin gathering...");
|
||||
// 插入Id段数据,忽略重复值
|
||||
try {
|
||||
String[] sourceFields = conf.getSourceFields().split(",");
|
||||
String[] targetFields = conf.getTargetFields().split(",");
|
||||
String selectItems = "";
|
||||
for (int i = 0; i < sourceFields.length; i++) {
|
||||
String str_source = sourceFields[i];
|
||||
String str_target = targetFields[i];
|
||||
selectItems += str_source + " as " + str_target + ",";
|
||||
}
|
||||
selectItems = selectItems
|
||||
.substring(0, selectItems.length() - 1) + " ";
|
||||
if (conf.getTargetTableName().equals("relative_memos")) {
|
||||
List<RelativeMemo> dataGet = gatherDao.getPostGatherData(
|
||||
sourceTableName, selectItems, beginId, endId,
|
||||
conf.getAndWhere());
|
||||
for (int i = 0; i < dataGet.size(); i++) {
|
||||
RelativeMemo model = dataGet.get(i);
|
||||
String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该帖子
|
||||
// 是否更新
|
||||
int postId = 0;
|
||||
if(GatherProcess.urlMd5Set.contains(urlMD5)){
|
||||
//urlmd5存在则更新
|
||||
RelativeMemo samePost = targetDao.findPostByUrlMD5(
|
||||
conf.getTargetTableName(), urlMD5);
|
||||
// update gather_projects表中对应的记录,在维持待更新表
|
||||
postId = samePost.getId();
|
||||
model.setId(postId);
|
||||
handleUpdateGatherPosts(samePost.getId(), model);
|
||||
}else{
|
||||
// 不存在 插入
|
||||
PKControlPosts pkControlModel = pkControlPostsDao
|
||||
.selectItemByUrlMD5(
|
||||
pkControlPostsTableName, urlMD5);// 查看有没有固定的id
|
||||
if (pkControlModel != null)
|
||||
model.setId(pkControlModel.getId());
|
||||
else {
|
||||
// 在pk_control_posts表中生成当前项目对应的id
|
||||
pkControlPostsDao.insertOneItem(
|
||||
pkControlPostsTableName, urlMD5);
|
||||
// 查看刚刚插入信息的id
|
||||
PKControlPosts controlItem = pkControlPostsDao
|
||||
.selectItemByUrlMD5(
|
||||
pkControlPostsTableName, urlMD5);
|
||||
// 用id构造model对应的固定不变的id
|
||||
model.setId(controlItem.getId());
|
||||
postId = model.getId();
|
||||
}
|
||||
handleInsertGatherPosts(model, conf);
|
||||
GatherProcess.urlMd5Set.add(urlMD5);
|
||||
}
|
||||
|
||||
// 将tag和项目的关系存入表item_tag_relation 并分离tag
|
||||
String tags = model.getTags();
|
||||
if (tags == null) {
|
||||
// 表示该项目没有标签
|
||||
continue;
|
||||
}
|
||||
List<String> tagList = DataHandler
|
||||
.tagsSegmentation(tags);
|
||||
for (String tag : tagList) {
|
||||
targetDao.insertTag(tagsTableName, tag);// ignore方式插入该项目的标签
|
||||
int tag_id = targetDao.selectTagIdByName(
|
||||
tagsTableName, tag);
|
||||
Taggings taggings = new Taggings();
|
||||
taggings.setTag_id(tag_id);
|
||||
taggings.setTaggable_id(postId);
|
||||
taggings.setTaggable_type("RelativeMemo");
|
||||
taggings.setContext("tags");
|
||||
taggings.setCreated_at(DataHandler.getNow());
|
||||
// 将Taggings对象存入数据库中
|
||||
try {
|
||||
targetDao.insertTaggings(memoTaggingsTableName,
|
||||
taggings);
|
||||
} catch (Exception e) {
|
||||
// 在插入记录之前 relative_memos表中的记录已经被删除掉了
|
||||
logger.error(e);
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (conf.getTargetTableName().equals("gather_projects")) {
|
||||
List<GatherProject> dataGet = gatherDao.getPrjGatherData(
|
||||
sourceTableName, selectItems, beginId, endId,
|
||||
conf.getAndWhere());
|
||||
for (int i = 0; i < dataGet.size(); i++) {
|
||||
GatherProject model = dataGet.get(i);
|
||||
String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该项目
|
||||
// 是否更新
|
||||
int prjId = 0;
|
||||
if(GatherProcess.urlMd5Set.contains(urlMD5)){
|
||||
GatherProject samePrj = targetDao.findPrjByUrlMD5(
|
||||
conf.getTargetTableName(), urlMD5);
|
||||
// update gather_projects表中对应的记录,在维持待更新表
|
||||
prjId = samePrj.getId();
|
||||
model.setId(prjId);
|
||||
model.setUpdate_mark(2);
|
||||
handleUpdateGatherProjects(samePrj.getId(), model);
|
||||
}else{
|
||||
// 不存在 插入
|
||||
PKControlProjects pkControlProjects = pkControlProjectsDao
|
||||
.selectItemByUrlMD5(
|
||||
pkControlProjectsTableName, urlMD5);// 查看有没有固定的id
|
||||
if (pkControlProjects != null)
|
||||
model.setId(pkControlProjects.getId());
|
||||
else {
|
||||
// 在pk_control_posts表中生成当前项目对应的id
|
||||
pkControlProjectsDao.insertOneItem(
|
||||
pkControlProjectsTableName, urlMD5);
|
||||
// 查看刚刚插入信息的id
|
||||
PKControlProjects controlItem = pkControlProjectsDao
|
||||
.selectItemByUrlMD5(
|
||||
pkControlProjectsTableName,
|
||||
urlMD5);
|
||||
// 用id构造model对应的固定不变的id
|
||||
model.setId(controlItem.getId());
|
||||
prjId = model.getId();
|
||||
}
|
||||
model.setUpdate_mark(0);
|
||||
handleInsertGatherProjects(model, conf);
|
||||
GatherProcess.urlMd5Set.add(urlMD5);
|
||||
}
|
||||
|
||||
// // 将tag和项目的关系存入表item_tag_relation 并分离tag
|
||||
// String tags = model.getTags();
|
||||
// if (tags == null) {
|
||||
// // 表示该项目没有标签
|
||||
// continue;
|
||||
// }
|
||||
// List<String> tagList = DataHandler
|
||||
// .tagsSegmentation(tags);
|
||||
// for (String tag : tagList) {
|
||||
// targetDao.insertTag(tagsTableName, tag);// ignore方式插入该项目的标签
|
||||
// int tag_id = targetDao.selectTagIdByName(
|
||||
// tagsTableName, tag);
|
||||
// Taggings taggings = new Taggings();
|
||||
// taggings.setTag_id(tag_id);
|
||||
// taggings.setTaggable_id(prjId);
|
||||
// taggings.setTaggable_type("OpenSourceProject");
|
||||
// taggings.setContext("tags");
|
||||
// taggings.setCreated_at(DataHandler.getNow());
|
||||
// // 将Taggings对象存入数据库中
|
||||
// try {
|
||||
// targetDao.insertTaggings(taggingsTableName,
|
||||
// taggings);
|
||||
// } catch (Exception e) {
|
||||
// // 在插入记录之前 relative_memos表中的记录已经被删除掉了
|
||||
// logger.error(e);
|
||||
// System.exit(0);
|
||||
// }
|
||||
// }
|
||||
}
|
||||
} else {
|
||||
List<JobRequirement> dataGet = gatherDao.getJobGatherData(
|
||||
sourceTableName, selectItems, beginId, endId,
|
||||
conf.getAndWhere());
|
||||
for (int i = 0; i < dataGet.size(); i++) {
|
||||
JobRequirement model = dataGet.get(i);
|
||||
String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该帖子
|
||||
// 是否更新
|
||||
int postId = 0;
|
||||
if(GatherProcess.urlMd5Set.contains(urlMD5)){
|
||||
JobRequirement sameJob = targetDao.findJobByUrlMD5(
|
||||
conf.getTargetTableName(), urlMD5);
|
||||
// update gather_projects表中对应的记录,在维持待更新表
|
||||
postId = sameJob.getId();
|
||||
model.setId(postId);
|
||||
handleUpdateGatherJobs(sameJob.getId(), model);
|
||||
}else{
|
||||
// 不存在 插入
|
||||
PKControlPosts pkControlPosts = pkControlPostsDao
|
||||
.selectItemByUrlMD5(
|
||||
pkControlPostsTableName, urlMD5);// 查看有没有固定的id
|
||||
if (pkControlPosts != null)
|
||||
model.setId(pkControlPosts.getId());
|
||||
else {
|
||||
// 在pk_control_posts表中生成当前项目对应的id
|
||||
pkControlPostsDao.insertOneItem(
|
||||
pkControlPostsTableName, urlMD5);
|
||||
// 查看刚刚插入信息的id
|
||||
PKControlPosts controlItem = pkControlPostsDao
|
||||
.selectItemByUrlMD5(
|
||||
pkControlPostsTableName, urlMD5);
|
||||
// 用id构造model对应的固定不变的id
|
||||
model.setId(controlItem.getId());
|
||||
postId = model.getId();
|
||||
}
|
||||
handleInsertGatherJobs(model, conf);
|
||||
GatherProcess.urlMd5Set.add(urlMD5);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (Exception ex) {
|
||||
// 数据迁移过程可能发生异常情况
|
||||
logger.error(ex);
|
||||
System.exit(0);
|
||||
}
|
||||
|
||||
// 更新游标到本次 EndId+1;
|
||||
pointerDao.updatePointer(conf.getPointerTableName(),
|
||||
sourceTableName, conf.getTargetTableName(), endId + 1);// sourceIdBegin
|
||||
// +
|
||||
// idsIncrement
|
||||
logger.info("current--" + sourceTableName + ": " + endId);
|
||||
}
|
||||
}
|
||||
|
||||
// 处理URL不存在的帖子 插入relative_memos表
|
||||
public void handleInsertGatherPosts(RelativeMemo model, Configure conf) {
|
||||
try {
|
||||
targetDao.insertRelativeMemo(conf.getTargetTableName(),
|
||||
conf.getTargetFields(), model);
|
||||
} catch (Exception e) {
|
||||
logger.error(e);
|
||||
}
|
||||
}
|
||||
|
||||
// 处理URL相同的帖子更新 id表示更新的帖子固定id
|
||||
public void handleUpdateGatherPosts(int id, RelativeMemo model_new) {
|
||||
targetDao.updateRelativeMemo(gatherPostsTableName, model_new, id);// 更新数据relative_memos表
|
||||
}
|
||||
|
||||
// 处理URL不存在的项目 插入gather_projects表
|
||||
public void handleInsertGatherProjects(GatherProject model, Configure conf) {
|
||||
try {
|
||||
targetDao.insertOpenSourceProject(conf.getTargetTableName(),
|
||||
conf.getTargetFields(), model);
|
||||
} catch (Exception e) {
|
||||
logger.error(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// 处理URL相同的项目更新 id表示更新的项目固定id
|
||||
public void handleUpdateGatherProjects(int id, GatherProject model_new) {
|
||||
targetDao.updateOpenSourceProject(conf.getTargetTableName(), model_new,
|
||||
id);// 更新数据gather_projects表
|
||||
}
|
||||
|
||||
// 处理URL不存在的项目 插入job_requirements表
|
||||
public void handleInsertGatherJobs(JobRequirement model, Configure conf) {
|
||||
try {
|
||||
targetDao.insertJobRequirement(conf.getTargetTableName(),
|
||||
conf.getTargetFields(), model);
|
||||
} catch (Exception e) {
|
||||
logger.error(e);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// 处理URL相同的项目更新 id表示更新的项目固定id
|
||||
public void handleUpdateGatherJobs(int id, JobRequirement model_new) {
|
||||
targetDao
|
||||
.updateJobRequirement(conf.getTargetTableName(), model_new, id);// 更新数据job_requirements表
|
||||
}
|
||||
}
|
||||
|
|
|
@ -36,9 +36,9 @@
|
|||
destroy-method="close">
|
||||
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
|
||||
<property name="url"
|
||||
value="jdbc:mysql://172.16.128.30:3306/ossean_production?characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&autoReconnect=true" />
|
||||
value="jdbc:mysql://172.16.128.36:3306/ossean_production?characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&autoReconnect=true" />
|
||||
<property name="username" value="gather" />
|
||||
<property name="password" value="influx1234" />
|
||||
<property name="password" value="Influx@1234" />
|
||||
<property name="initialSize" value="5" />
|
||||
<property name="maxActive" value="100" />
|
||||
</bean>
|
||||
|
|
|
@ -17,7 +17,6 @@ import org.apache.lucene.index.DirectoryReader;
|
|||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryparser.classic.ParseException;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
|
@ -36,11 +35,9 @@ import org.wltea.analyzer.lucene.IKAnalyzer;
|
|||
|
||||
import com.ossean.match.dao.ProjectDao;
|
||||
import com.ossean.match.dao.RelativeMemoDao;
|
||||
import com.ossean.match.matchprocess.CountFrequency;
|
||||
import com.ossean.match.matchprocess.MatchIncrement;
|
||||
import com.ossean.match.model.Project;
|
||||
import com.ossean.match.model.RelativeMemo;
|
||||
import com.ossean.match.model.Tag;
|
||||
import com.ossean.match.utils.Normalizer;
|
||||
|
||||
@Component("luceneindex")
|
||||
|
@ -84,36 +81,6 @@ public class LuceneIndex {
|
|||
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
|
||||
IndexWriter iw = new IndexWriter(dire, iwc);
|
||||
return iw;
|
||||
}
|
||||
|
||||
public static void buildTagIndex(List<Tag> tags) {
|
||||
Logger logger1 = LoggerFactory.getLogger(LuceneIndex.class);
|
||||
Directory dir;
|
||||
Analyzer analyzer = new IKAnalyzer(true);
|
||||
try {
|
||||
dir = FSDirectory.open(Paths.get(CountFrequency.tagIndexDir));
|
||||
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
|
||||
iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
|
||||
IndexWriter writer = new IndexWriter(dir, iwc);
|
||||
for (Tag tag : tags) {
|
||||
// 对标签建立索引
|
||||
Document doc = new Document();
|
||||
String name = tag.getName().toLowerCase();
|
||||
// 标签原始名字进行存储
|
||||
doc.add(new StringField("name", "" + name, Field.Store.YES));
|
||||
doc.add(new StringField("id", "" + tag.getId(), Field.Store.YES));
|
||||
// 对标签进行分词
|
||||
doc.add(new TextField("items", Normalizer.normalize(name),
|
||||
Field.Store.NO));
|
||||
writer.addDocument(doc);
|
||||
}
|
||||
writer.close();
|
||||
|
||||
} catch (IOException e) {
|
||||
logger1.error("buildTagIndex IOException: " + e);
|
||||
}
|
||||
// Analyzer analyzer = new SimpleAnalyzer();
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -172,9 +139,6 @@ public class LuceneIndex {
|
|||
endId += step;
|
||||
}
|
||||
}
|
||||
matchIncrement.setPrjHistory(0);
|
||||
matchIncrement.setPrjIndexed(maxPrjId);
|
||||
matchIncrement.writeIntoRecord();
|
||||
writer.commit();
|
||||
writer.close();
|
||||
}
|
||||
|
@ -225,9 +189,7 @@ public class LuceneIndex {
|
|||
endId += step;
|
||||
}
|
||||
}
|
||||
matchIncrement.setPrjHistory(0);
|
||||
matchIncrement.setMemoHistory(lastMemoId);
|
||||
matchIncrement.setMemoIndexed(lastMemoId);
|
||||
matchIncrement.writeIntoRecord();
|
||||
/*FileOutputStream fout = new FileOutputStream(new File("record.txt"));
|
||||
fout.write((0 + "\t" + lastMemoId + "\t" + 0 + "\t" + 0).getBytes()); //初始化record.txt,项目初始id为0,帖子初始id为创建索引的最后一个帖子的id
|
||||
|
@ -256,7 +218,7 @@ public class LuceneIndex {
|
|||
continue;
|
||||
}
|
||||
Document doc = new Document();
|
||||
doc.add(new StringField(memoIdFieldName, String.valueOf(rsId), Store.YES));
|
||||
doc.add(new StringField(memoIdFieldName, String.valueOf(rsId) + "", Store.YES));
|
||||
doc.add(new TextField(titleFieldName, memo.getTitle(), Store.NO));
|
||||
String memoTagsString = memo.getTags();
|
||||
if (memoTagsString == null) {
|
||||
|
@ -266,7 +228,7 @@ public class LuceneIndex {
|
|||
for(String memoTag : memoTagsList){
|
||||
doc.add(new StringField(memoTagsFieldName, memoTag, Store.NO));
|
||||
}
|
||||
indexWriter.addDocument(doc);
|
||||
indexWriter.updateDocument(new Term(memoIdFieldName, String.valueOf(rsId) + ""), doc);
|
||||
}
|
||||
indexWriter.commit();
|
||||
indexWriter.close();
|
||||
|
|
|
@ -7,11 +7,8 @@ import java.util.List;
|
|||
import javax.annotation.Resource;
|
||||
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.FieldInvertState;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryparser.classic.ParseException;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.search.BooleanQuery;
|
||||
import org.apache.lucene.search.Explanation;
|
||||
|
@ -24,10 +21,8 @@ import org.apache.lucene.search.similarities.DefaultSimilarity;
|
|||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.wltea.analyzer.lucene.IKAnalyzer;
|
||||
|
||||
import com.ossean.match.dao.ProjectDao;
|
||||
import com.ossean.match.model.Project;
|
||||
import com.ossean.match.utils.Normalizer;
|
||||
|
||||
public class LuceneSearch {
|
||||
|
@ -89,7 +84,7 @@ public class LuceneSearch {
|
|||
}
|
||||
query.add(tq, BooleanClause.Occur.SHOULD);
|
||||
}
|
||||
TopDocs td = is.search(query, 3);
|
||||
TopDocs td = is.search(query, 10000);
|
||||
ScoreDoc[] sds = td.scoreDocs;
|
||||
for (ScoreDoc sd : sds) {
|
||||
Document d = is.doc(sd.doc);
|
||||
|
@ -98,10 +93,6 @@ public class LuceneSearch {
|
|||
for(String prjName : prjNames){
|
||||
if (keyWords.contains(prjName)) {
|
||||
int pId = Integer.parseInt(prjId);
|
||||
Project currentPrj = projectDao.getPrjById(pId);
|
||||
if(currentPrj.getFiltration()==0){
|
||||
|
||||
}
|
||||
if (matchMap.containsKey(pId)) {
|
||||
matchMap.put(pId, matchMap.get(pId) + weight + sd.score/1000);
|
||||
} else
|
||||
|
@ -130,59 +121,45 @@ public class LuceneSearch {
|
|||
*/
|
||||
public static HashMap<Integer, Double> prjToMemoMatchByLucene(
|
||||
String prjName, String searchField, double weight,
|
||||
HashMap<Integer, Double> map, IndexReader memoIndexReader, IndexReader prjIndexReader) {
|
||||
HashMap<Integer, Double> map, IndexReader memoIndexReader) {
|
||||
|
||||
try {
|
||||
IndexSearcher is = new IndexSearcher(memoIndexReader);
|
||||
List<String> prjNameList = Normalizer.getList(prjName);
|
||||
BooleanQuery query = new BooleanQuery();
|
||||
// Similarity similarity = new DefaultSimilarity(){
|
||||
// @Override
|
||||
// public float queryNorm(float sumOfSquaredWeights) {
|
||||
// return 1.0f;
|
||||
// }
|
||||
// @Override
|
||||
// public float lengthNorm(FieldInvertState state) {
|
||||
// return 1.0f;
|
||||
// }
|
||||
// };
|
||||
// is.setSimilarity(similarity);
|
||||
|
||||
for(String prjNameTerm : prjNameList){
|
||||
//Term termForFreq = new Term(LuceneIndex.prjNameFieldName, prjNameTerm);
|
||||
Term term = new Term(searchField, prjNameTerm);
|
||||
TermQuery tq = new TermQuery(term);
|
||||
// double curTermDocFreq = prjIndexReader.docFreq(termForFreq);
|
||||
// if (prjNameList.size() > 1 && curTermDocFreq > 10) {
|
||||
// tq.setBoost((float) (1/(curTermDocFreq/10)));
|
||||
// }
|
||||
// else {
|
||||
// tq.setBoost(1.1f);
|
||||
// }
|
||||
query.add(tq, BooleanClause.Occur.MUST);
|
||||
query.add(tq, BooleanClause.Occur.MUST); //项目名分词后的每个term都必须在帖子中出现
|
||||
}
|
||||
TopDocs td = is.search(query, 1000000);
|
||||
ScoreDoc[] sds = td.scoreDocs;
|
||||
for (ScoreDoc sd : sds) {
|
||||
Document d = is.doc(sd.doc);
|
||||
// if (sd.score >= 0.7) {
|
||||
String postId = d.get(LuceneIndex.memoIdFieldName);
|
||||
int pId = Integer.parseInt(postId);
|
||||
if (map.containsKey(pId)) {
|
||||
map.put(pId, map.get(pId) + weight + sd.score/1000);
|
||||
} else
|
||||
map.put(pId, weight + sd.score/1000);
|
||||
// }
|
||||
}
|
||||
} catch (IOException e) {
|
||||
logger.error("prjToMemoMatchByLucene IOException: " + e);
|
||||
} /*catch (ParseException e) {
|
||||
logger.error("prjToMemoMatchByLucene ParseException: " + e);
|
||||
}*/
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
public static HashMap<Integer, Double> searchMemoTags(String tagStr, String searchField, double weight,
|
||||
/**
|
||||
* 项目名和项目别名与帖子标签之间的匹配
|
||||
* @param tagStr
|
||||
* @param searchField
|
||||
* @param weight
|
||||
* @param map
|
||||
* @param indexReader
|
||||
* @return
|
||||
*/
|
||||
public static HashMap<Integer, Double> searchMemoTags(String tagStr, String searchField, String idField, double weight,
|
||||
HashMap<Integer, Double> map, IndexReader indexReader) {
|
||||
|
||||
try {
|
||||
|
@ -193,11 +170,11 @@ public class LuceneSearch {
|
|||
ScoreDoc[] sds = td.scoreDocs;
|
||||
for (ScoreDoc sd : sds) {
|
||||
Document d = is.doc(sd.doc);
|
||||
String postId = d.get(LuceneIndex.memoIdFieldName);
|
||||
String postId = d.get(idField);
|
||||
int pId = Integer.parseInt(postId);
|
||||
if (map.containsKey(pId)) {
|
||||
if (map.containsKey(pId)) {
|
||||
map.put(pId, map.get(pId) + weight);
|
||||
} else {
|
||||
} else {
|
||||
map.put(pId, weight);
|
||||
}
|
||||
}
|
||||
|
@ -207,6 +184,16 @@ public class LuceneSearch {
|
|||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* 项目标签与帖子标签
|
||||
* @param tagStr
|
||||
* @param searchField
|
||||
* @param idField
|
||||
* @param weight
|
||||
* @param map
|
||||
* @param indexReader
|
||||
* @return
|
||||
*/
|
||||
public static HashMap<Integer, Double> searchByPrjTag(String tagStr, String searchField, String idField, double weight,
|
||||
HashMap<Integer, Double> map, IndexReader indexReader) {
|
||||
|
||||
|
@ -220,7 +207,7 @@ public class LuceneSearch {
|
|||
Document d = is.doc(sd.doc);
|
||||
String postId = d.get(idField);
|
||||
int pId = Integer.parseInt(postId);
|
||||
if (map.containsKey(pId)) {
|
||||
if (map.containsKey(pId)) { //有项目名和项目别名的匹配时才加入标签匹配的结果
|
||||
map.put(pId, map.get(pId) + weight);
|
||||
}
|
||||
}
|
||||
|
@ -230,6 +217,16 @@ public class LuceneSearch {
|
|||
return map;
|
||||
}
|
||||
|
||||
/**
|
||||
* 项目标签搜索帖子标题
|
||||
* @param tagStr
|
||||
* @param searchField
|
||||
* @param idField
|
||||
* @param weight
|
||||
* @param map
|
||||
* @param indexReader
|
||||
* @return
|
||||
*/
|
||||
public static HashMap<Integer, Double> searchByPrjTagInMemoTitle(String tagStr, String searchField, String idField, double weight,
|
||||
HashMap<Integer, Double> map, IndexReader indexReader) {
|
||||
|
||||
|
@ -260,34 +257,34 @@ public class LuceneSearch {
|
|||
|
||||
|
||||
// 获得每个帖子匹配到的标签个数
|
||||
public static HashMap<Integer, Integer> tagsMatch(String idField,
|
||||
String tags, String searchField, IndexReader indexReader) {
|
||||
HashMap<Integer, Integer> tagsMatchNum = new HashMap<Integer, Integer>();
|
||||
try {
|
||||
IndexSearcher is = new IndexSearcher(indexReader);
|
||||
QueryParser parser = new QueryParser(searchField, new IKAnalyzer(true));
|
||||
Query query = parser.parse(tags);
|
||||
TopDocs td = is.search(query, 100000);
|
||||
ScoreDoc[] sds = td.scoreDocs;
|
||||
for (ScoreDoc sd : sds) {
|
||||
Document d = is.doc(sd.doc);
|
||||
String postId = d.get(idField);
|
||||
int pId = Integer.parseInt(postId);
|
||||
Explanation explanation = is.explain(query, sd.doc);
|
||||
int hitNum = getHitTermsNum(explanation);
|
||||
if (tagsMatchNum.containsKey(pId)) {
|
||||
tagsMatchNum.put(pId, tagsMatchNum.get(pId) + hitNum);
|
||||
} else
|
||||
tagsMatchNum.put(pId, hitNum);
|
||||
}
|
||||
} catch (IOException e) {
|
||||
logger.error("tagsMatch IOException: " + e);
|
||||
} catch (ParseException e) {
|
||||
logger.error("tagsMatch ParseException: " + e);
|
||||
}
|
||||
|
||||
return tagsMatchNum;
|
||||
}
|
||||
// public static HashMap<Integer, Integer> tagsMatch(String idField,
|
||||
// String tags, String searchField, IndexReader indexReader) {
|
||||
// HashMap<Integer, Integer> tagsMatchNum = new HashMap<Integer, Integer>();
|
||||
// try {
|
||||
// IndexSearcher is = new IndexSearcher(indexReader);
|
||||
// QueryParser parser = new QueryParser(searchField, new IKAnalyzer(true));
|
||||
// Query query = parser.parse(tags);
|
||||
// TopDocs td = is.search(query, 100000);
|
||||
// ScoreDoc[] sds = td.scoreDocs;
|
||||
// for (ScoreDoc sd : sds) {
|
||||
// Document d = is.doc(sd.doc);
|
||||
// String postId = d.get(idField);
|
||||
// int pId = Integer.parseInt(postId);
|
||||
// Explanation explanation = is.explain(query, sd.doc);
|
||||
// int hitNum = getHitTermsNum(explanation);
|
||||
// if (tagsMatchNum.containsKey(pId)) {
|
||||
// tagsMatchNum.put(pId, tagsMatchNum.get(pId) + hitNum);
|
||||
// } else
|
||||
// tagsMatchNum.put(pId, hitNum);
|
||||
// }
|
||||
// } catch (IOException e) {
|
||||
// logger.error("tagsMatch IOException: " + e);
|
||||
// } catch (ParseException e) {
|
||||
// logger.error("tagsMatch ParseException: " + e);
|
||||
// }
|
||||
//
|
||||
// return tagsMatchNum;
|
||||
// }
|
||||
|
||||
//get the number of terms hitted in docs
|
||||
public static int getHitTermsNum(Explanation explanation){
|
||||
|
|
|
@ -9,7 +9,6 @@ import org.springframework.context.support.ClassPathXmlApplicationContext;
|
|||
import org.springframework.stereotype.Component;
|
||||
|
||||
import com.ossean.match.lucene.LuceneIndex;
|
||||
import com.ossean.match.matchprocess.CountFrequency;
|
||||
import com.ossean.match.matchprocess.MatchIncrement;
|
||||
import com.ossean.match.matchprocess.MemoToPrjMatch;
|
||||
import com.ossean.match.matchprocess.NewPrjMonitor;
|
||||
|
@ -29,10 +28,6 @@ public class Main {
|
|||
@Autowired
|
||||
private PrjToMemoMatch prjToMemoMatch;
|
||||
|
||||
@Qualifier("countfrequency")
|
||||
@Autowired
|
||||
private CountFrequency countFrequency;
|
||||
|
||||
@Qualifier("luceneindex")
|
||||
@Autowired
|
||||
private LuceneIndex luceneIndex;
|
||||
|
@ -49,31 +44,17 @@ public class Main {
|
|||
public void start() throws InterruptedException, ParseException {
|
||||
while (true) {
|
||||
MatchIncrement matchIncrement = new MatchIncrement();
|
||||
if (matchIncrement.getTaskToPrjId() != 0) { //判断是否设置了截止项目id
|
||||
prjToMemoMatch.setMatchIncrement(matchIncrement);
|
||||
countFrequency.setMatchIncrement(matchIncrement);
|
||||
if (matchIncrement.getPrjHistory() == 0 && matchIncrement.getMemoHistory() == 0) {
|
||||
luceneIndex.run(matchIncrement);
|
||||
}
|
||||
newPrjMonitor.run();
|
||||
countFrequency.run();
|
||||
prjToMemoMatch.run();
|
||||
}
|
||||
else {
|
||||
newPrjMonitor.setMatchIncrement(matchIncrement);
|
||||
memoToPrjMatch.setMatchIncrement(matchIncrement);
|
||||
prjToMemoMatch.setMatchIncrement(matchIncrement);
|
||||
countFrequency.setMatchIncrement(matchIncrement);
|
||||
|
||||
if (matchIncrement.getPrjHistory() == 0 && matchIncrement.getMemoHistory() == 0) {
|
||||
luceneIndex.run(matchIncrement);
|
||||
}
|
||||
|
||||
newPrjMonitor.run();
|
||||
countFrequency.run();
|
||||
prjToMemoMatch.run();
|
||||
memoToPrjMatch.run();
|
||||
newPrjMonitor.setMatchIncrement(matchIncrement);
|
||||
memoToPrjMatch.setMatchIncrement(matchIncrement);
|
||||
prjToMemoMatch.setMatchIncrement(matchIncrement);
|
||||
|
||||
if (matchIncrement.getMemoHistory() == 0) {
|
||||
luceneIndex.run(matchIncrement);
|
||||
}
|
||||
|
||||
newPrjMonitor.run();
|
||||
prjToMemoMatch.run();
|
||||
memoToPrjMatch.run();
|
||||
if (matchIncrement.getSleepTime() > 0) {
|
||||
logger.info(".........sleeping.........." + matchIncrement.getSleepTime()/1000 + "s......");
|
||||
}
|
||||
|
|
|
@ -1,364 +0,0 @@
|
|||
package com.ossean.match.matchprocess;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.queryparser.classic.ParseException;
|
||||
import org.apache.lucene.queryparser.classic.QueryParser;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.wltea.analyzer.lucene.IKAnalyzer;
|
||||
|
||||
import com.ossean.match.dao.AtomicItemDao;
|
||||
import com.ossean.match.dao.ProjectDao;
|
||||
import com.ossean.match.dao.TagDao;
|
||||
import com.ossean.match.lucene.LuceneIndex;
|
||||
import com.ossean.match.model.Atom;
|
||||
import com.ossean.match.model.Project;
|
||||
import com.ossean.match.model.Tag;
|
||||
import com.ossean.match.pipeline.AtomicItemPipeline;
|
||||
import com.ossean.match.pipeline.ResultPipeline;
|
||||
import com.ossean.match.utils.Extractor;
|
||||
import com.ossean.match.utils.Normalizer;
|
||||
import com.ossean.match.utils.SimilarityCounter;
|
||||
|
||||
@Component("countfrequency")
|
||||
public class CountFrequency {
|
||||
|
||||
@Resource
|
||||
private AtomicItemPipeline atomicItemPipeline;
|
||||
@Resource
|
||||
private AtomicItemDao atomicItemDao;
|
||||
@Resource
|
||||
private ProjectDao projectDao;
|
||||
@Resource
|
||||
private TagDao tagDao;
|
||||
@Resource
|
||||
private ResultPipeline resultPipeline;
|
||||
|
||||
public static List<Atom> atomList;
|
||||
// = atomicItemDao.selectAllAtom();
|
||||
|
||||
public static Map<String, ArrayList<Integer>> atoms;
|
||||
// = transformAtoms(atomList);
|
||||
|
||||
public static String tagIndexDir = "tagIndexDir";
|
||||
|
||||
private Logger logger = LoggerFactory.getLogger(getClass());
|
||||
MatchIncrement matchIncrement = null;
|
||||
|
||||
public void setMatchIncrement(MatchIncrement matchIncrement) {
|
||||
this.matchIncrement = matchIncrement;
|
||||
}
|
||||
|
||||
private static Map<String, ArrayList<Integer>> transformAtoms(
|
||||
List<Atom> atomList) {
|
||||
// TODO Auto-generated method stub
|
||||
ArrayList<Integer> nums = null;
|
||||
Map<String, ArrayList<Integer>> atoms = new HashMap<String, ArrayList<Integer>>();
|
||||
String name = null;
|
||||
for (Atom atom : atomList) {
|
||||
nums = new ArrayList<Integer>(2);
|
||||
nums.add(atom.getNumTag());
|
||||
nums.add(atom.getNumInProj());
|
||||
nums.add(atom.getStatus());
|
||||
name = atom.getName();
|
||||
atoms.put(name, nums);
|
||||
}
|
||||
return atoms;
|
||||
}
|
||||
|
||||
private List<Atom> retransformAtoms(Map<String, ArrayList<Integer>> atoms) {
|
||||
List<Atom> atomList = new ArrayList<Atom>();
|
||||
Set<String> items = atoms.keySet();
|
||||
for (String item : items) {
|
||||
Atom atom = new Atom();
|
||||
atom.setName(item);
|
||||
atom.setNumInTag(atoms.get(item).get(0));
|
||||
atom.setNumInProj(atoms.get(item).get(1));
|
||||
atom.setStatus(atoms.get(item).get(2));
|
||||
atomList.add(atom);
|
||||
}
|
||||
return atomList;
|
||||
}
|
||||
|
||||
private void extractTags(List<Tag> tags,
|
||||
Map<String, ArrayList<Integer>> atoms) {
|
||||
for (Tag tag : tags) {
|
||||
List<String> items = Extractor.extractAtoms(tag.getName());
|
||||
logger.info("extract tag : " + tag.getId() + ","
|
||||
+ tag.getName() + ">>" + items);
|
||||
saveAtoms(items, atoms, 0);
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
private void extractProjects(List<Project> projects,
|
||||
Map<String, ArrayList<Integer>> atoms) {
|
||||
for (Project project : projects) {
|
||||
List<String> items = Extractor.extractAtoms(project.getName());
|
||||
logger.info("extract project : " + project.getId() + ","
|
||||
+ project.getName() + ">>" + items);
|
||||
saveAtoms(items, atoms, 1);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* @param items 抽取元素
|
||||
* @param atoms 存储抽取到的元素
|
||||
* @param pos 用来标注项目和标签,1为项目,0为标签
|
||||
*/
|
||||
private void saveAtoms(List<String> items,
|
||||
Map<String, ArrayList<Integer>> atoms, int pos) {
|
||||
|
||||
ArrayList<Integer> nums = null;
|
||||
for (String item : items) {
|
||||
if (atoms.containsKey(item)) {
|
||||
nums = atoms.get(item);
|
||||
if(nums.get(2) == 0) {
|
||||
nums.set(2, 1);
|
||||
}
|
||||
} else {
|
||||
nums = new ArrayList<Integer>(2);
|
||||
nums.add(0);
|
||||
nums.add(0);
|
||||
nums.add(2);
|
||||
}
|
||||
nums.set(pos, nums.get(pos) + 1);
|
||||
atoms.put(item, nums);
|
||||
}
|
||||
}
|
||||
|
||||
// 对项目(projName)和标签进行匹配,并将匹配结果存入数据库
|
||||
private List<String> queryMatch(String projName, int projId, Map<String, ArrayList<Integer>> atoms)
|
||||
throws ParseException {
|
||||
Directory dir;
|
||||
Analyzer analyzer = new IKAnalyzer(true);
|
||||
List<String> resultStrings = new ArrayList<String>();
|
||||
try {
|
||||
dir = FSDirectory.open(Paths.get(CountFrequency.tagIndexDir));
|
||||
IndexReader reader = DirectoryReader.open(dir);
|
||||
IndexSearcher searcher = new IndexSearcher(reader);
|
||||
QueryParser parser = new QueryParser("items", analyzer);
|
||||
Query query = null;
|
||||
if (projName != null && !"".equals(Normalizer.normalize(projName))) {
|
||||
query = parser.parse(QueryParser.escape(Normalizer
|
||||
.normalize(projName)));
|
||||
|
||||
TopDocs hits = searcher.search(query, 1000);
|
||||
if (hits.scoreDocs != null) {
|
||||
for (ScoreDoc sc : hits.scoreDocs) {
|
||||
int docNum = sc.doc;
|
||||
int tagId = Integer.parseInt(searcher.doc(docNum).get(
|
||||
"id"));
|
||||
String tagName = searcher.doc(docNum).get("name");
|
||||
// System.out.println("match: " + projId + " - " + tagId
|
||||
// +
|
||||
// "("
|
||||
// + tagName + ")");
|
||||
// System.out.println("completely matched tag >>> id: "
|
||||
// + tagId + ",name: " + tagOriginalName);
|
||||
List<String> itemsOfPrj = Extractor
|
||||
.extractAtoms(projName);
|
||||
List<String> itemsOfTag = Extractor
|
||||
.extractAtoms(tagName);
|
||||
|
||||
int prjWeight[] = new int[itemsOfPrj.size()];
|
||||
int tagWeight[] = new int[itemsOfTag.size()];
|
||||
|
||||
int i = 0;
|
||||
for (String item : itemsOfPrj) {
|
||||
prjWeight[i++] = atoms.get(item).get(1);
|
||||
}
|
||||
i = 0;
|
||||
for (String item : itemsOfTag) {
|
||||
tagWeight[i++] = atoms.get(item).get(0);
|
||||
}
|
||||
|
||||
float score = SimilarityCounter.countSimilarity(
|
||||
itemsOfPrj, prjWeight, itemsOfTag, tagWeight);
|
||||
float EPSINON = 0.999F;
|
||||
if (!((score >= -EPSINON) && (score <= EPSINON))) {
|
||||
resultPipeline.insertResult3(projId, projName,
|
||||
tagId, tagName, score);
|
||||
resultStrings.add(tagName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
return resultStrings;
|
||||
}
|
||||
|
||||
// 对项目(projName)和新标签进行匹配,并将匹配结果存入数据库
|
||||
private List<String> queryMatchNewTag(String tagName, int tagId, Map<String, ArrayList<Integer>> atoms)
|
||||
throws ParseException {
|
||||
// Directory dir;
|
||||
tagName = tagName.trim();
|
||||
Analyzer analyzer = new IKAnalyzer(true);
|
||||
List<String> resultStrings = new ArrayList<String>();
|
||||
try {
|
||||
Directory dire = FSDirectory.open(Paths
|
||||
.get(LuceneIndex.PRJS_INDEX_PATH));
|
||||
IndexReader indexReader = DirectoryReader.open(dire);
|
||||
|
||||
// dir = FSDirectory.open(Paths.get(CountFrequency.tagIndexDir));
|
||||
// IndexReader reader = DirectoryReader.open(dir);
|
||||
IndexSearcher searcher = new IndexSearcher(indexReader);
|
||||
QueryParser parser = new QueryParser("items", analyzer);
|
||||
Query query = null;
|
||||
if (tagName != null && !"".equals(Normalizer.normalize(tagName))) {
|
||||
query = parser.parse(QueryParser.escape(Normalizer
|
||||
.normalize(tagName)));
|
||||
TopDocs hits = searcher.search(query, 1000);
|
||||
|
||||
if (hits.scoreDocs != null) {
|
||||
for (ScoreDoc sc : hits.scoreDocs) {
|
||||
int docNum = sc.doc;
|
||||
int projId = Integer.parseInt(searcher.doc(docNum).get(
|
||||
LuceneIndex.prjIdFieldName));
|
||||
String projName = searcher.doc(docNum).get(
|
||||
LuceneIndex.prjNameFieldName);
|
||||
// System.out.println("match: " + projId + " - " + tagId
|
||||
// +
|
||||
// "("
|
||||
// + tagName + ")");
|
||||
// System.out.println("completely matched tag >>> id: "
|
||||
// + tagId + ",name: " + tagOriginalName);
|
||||
List<String> itemsOfPrj = Extractor
|
||||
.extractAtoms(projName);
|
||||
List<String> itemsOfTag = Extractor
|
||||
.extractAtoms(tagName);
|
||||
|
||||
int prjWeight[] = new int[itemsOfPrj.size()];
|
||||
int tagWeight[] = new int[itemsOfTag.size()];
|
||||
|
||||
int i = 0;
|
||||
for (String item : itemsOfPrj) {
|
||||
prjWeight[i++] = atoms.get(item).get(1);
|
||||
}
|
||||
i = 0;
|
||||
for (String item : itemsOfTag) {
|
||||
tagWeight[i++] = atoms.get(item).get(0);
|
||||
}
|
||||
|
||||
float score = SimilarityCounter.countSimilarity(
|
||||
itemsOfPrj, prjWeight, itemsOfTag, tagWeight);
|
||||
float EPSINON = 0.999F;
|
||||
if (!((score >= -EPSINON) && (score <= EPSINON))) {
|
||||
// TODO pipeline
|
||||
resultPipeline.insertResult3(projId, projName,
|
||||
tagId, tagName, score);
|
||||
resultStrings.add(projName);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
return resultStrings;
|
||||
}
|
||||
|
||||
public void run() throws ParseException {
|
||||
atomList = atomicItemDao.selectAllAtom();
|
||||
atoms = transformAtoms(atomList);
|
||||
if (matchIncrement.getPrePrjHistory() == 0
|
||||
&& matchIncrement.getTagHistory() == 0) {
|
||||
List<Tag> tags = tagDao.getBatchTag(matchIncrement.getTagHistory(),
|
||||
matchIncrement.getLastTagId());
|
||||
// 建立索引
|
||||
LuceneIndex.buildTagIndex(tags);
|
||||
|
||||
List<Project> projects = projectDao.getBatchPrjsIncre(2000000);
|
||||
|
||||
// 抽取项目
|
||||
extractProjects(projects, atoms);
|
||||
matchIncrement.getMatchRecord();
|
||||
matchIncrement.setPrePrjHistory(matchIncrement.getLastPrjId());
|
||||
|
||||
// 抽取标签
|
||||
extractTags(tags, atoms);
|
||||
matchIncrement.setTagHistory(matchIncrement.getLastTagId());
|
||||
|
||||
// atomList.clear();
|
||||
// atomList = retransformAtoms(atoms);
|
||||
// atomicItemPipeline.pipelineAtoms(atomList);
|
||||
|
||||
// 新项目与标签匹配
|
||||
for (Project project : projects) {
|
||||
queryMatch(project.getName().trim(),
|
||||
project.getId(), atoms);
|
||||
}
|
||||
matchIncrement.writeIntoRecord();
|
||||
} else {
|
||||
if (matchIncrement.isNewPrePrjFlag()) {
|
||||
logger.info("start count frequency of projects match!!!");
|
||||
List<Project> projects = projectDao.getBatchPrjs(
|
||||
matchIncrement.getPrjHistory(),
|
||||
matchIncrement.getLastPrjId());
|
||||
|
||||
// 抽取项目
|
||||
extractProjects(projects, atoms);
|
||||
matchIncrement.setPrePrjHistory(matchIncrement
|
||||
.getLastPrjId());
|
||||
|
||||
// 新项目与标签匹配
|
||||
for (Project project : projects) {
|
||||
List<String> tags = new ArrayList<String>();
|
||||
tags = queryMatch(project.getName().trim(),
|
||||
project.getId(),atoms);
|
||||
|
||||
}
|
||||
matchIncrement.writeIntoRecord();
|
||||
}
|
||||
if (matchIncrement.isNewTagflag()) {
|
||||
logger.info("start count frequency of projects match!!!");
|
||||
|
||||
List<Tag> tags = tagDao.getBatchTag(
|
||||
matchIncrement.getTagHistory(),
|
||||
matchIncrement.getLastTagId());
|
||||
// 建立索引
|
||||
LuceneIndex.buildTagIndex(tags);
|
||||
// 抽取项目
|
||||
extractTags(tags, atoms);
|
||||
matchIncrement.setTagHistory(matchIncrement.getLastTagId());
|
||||
|
||||
// 新标签与项目匹配
|
||||
for (Tag tag : tags) {
|
||||
List<String> projects = new ArrayList<String>();
|
||||
projects = queryMatchNewTag(tag.getName().trim(),
|
||||
tag.getId(), atoms);
|
||||
|
||||
}
|
||||
matchIncrement.writeIntoRecord();
|
||||
}
|
||||
}
|
||||
atomList.clear();
|
||||
atomList = retransformAtoms(atoms);
|
||||
atomicItemPipeline.pipelineAtoms(atomList);
|
||||
}
|
||||
|
||||
}
|
|
@ -5,7 +5,6 @@ import java.nio.file.Paths;
|
|||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
@ -24,7 +23,6 @@ import com.ossean.match.lucene.LuceneSearch;
|
|||
import com.ossean.match.model.Project;
|
||||
import com.ossean.match.model.RelativeMemo;
|
||||
import com.ossean.match.model.RelativeMemoToOpenSourceProject;
|
||||
import com.ossean.match.pipeline.ResultPipeline;
|
||||
import com.ossean.match.utils.Normalizer;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
|
@ -38,8 +36,6 @@ public class Match {
|
|||
private double titleWeight = 0.8;
|
||||
private double threshholdWeight = 1.3;
|
||||
|
||||
@Resource
|
||||
private ResultPipeline resultPipeline;
|
||||
@Resource
|
||||
private RelativeMemoToOpenSourceProjectDao matchResultDao;
|
||||
@Resource
|
||||
|
@ -59,14 +55,11 @@ public class Match {
|
|||
try {
|
||||
Directory memoDire = FSDirectory.open(Paths
|
||||
.get(LuceneIndex.MEMOS_INDEX_PATH));
|
||||
Directory prjDire = FSDirectory.open(Paths
|
||||
.get(LuceneIndex.PRJS_INDEX_PATH));
|
||||
if (!DirectoryReader.indexExists(memoDire)) { // if the index directory
|
||||
// doesn't exist
|
||||
return;
|
||||
}
|
||||
IndexReader memoIndexReader = DirectoryReader.open(memoDire);
|
||||
IndexReader prjIndexReader = DirectoryReader.open(prjDire);
|
||||
for(Project project : projects) {
|
||||
int prjId = 0;
|
||||
String prjName = "";
|
||||
|
@ -78,11 +71,14 @@ public class Match {
|
|||
if(project.getTags() != null)
|
||||
prjTags = project.getTags().toLowerCase();
|
||||
prjName = project.getName().toLowerCase();
|
||||
//long start1 = System.currentTimeMillis();
|
||||
// 项目名匹配帖子标题
|
||||
if (prjName.length() >= 2 && !Normalizer.isAllNumber(prjName)) {
|
||||
// 项目名匹配帖子标题
|
||||
matchMap = LuceneSearch.prjToMemoMatchByLucene(prjName,
|
||||
LuceneIndex.titleFieldName, titleWeight, matchMap, memoIndexReader, prjIndexReader);
|
||||
LuceneIndex.titleFieldName, titleWeight, matchMap, memoIndexReader);
|
||||
// 项目名匹配帖子标签
|
||||
matchMap = LuceneSearch.searchMemoTags(
|
||||
prjName, LuceneIndex.memoTagsFieldName, LuceneIndex.memoIdFieldName, memoTagsWeight,
|
||||
matchMap, memoIndexReader);
|
||||
}
|
||||
if (project.getSynonyms() != null) {
|
||||
synonyms = project.getSynonyms().toLowerCase();
|
||||
|
@ -91,88 +87,39 @@ public class Match {
|
|||
//项目别名匹配帖子标题 TODO
|
||||
for (String synonym : synonymsList) {
|
||||
matchMap = LuceneSearch.prjToMemoMatchByLucene(synonym,
|
||||
LuceneIndex.titleFieldName, titleWeight, matchMap, memoIndexReader, prjIndexReader);
|
||||
LuceneIndex.titleFieldName, titleWeight, matchMap, memoIndexReader);
|
||||
}
|
||||
//项目别名匹配帖子标签 TODO
|
||||
for (String synonym : synonymsList) {
|
||||
matchMap = LuceneSearch.searchMemoTags(
|
||||
synonym, LuceneIndex.memoTagsFieldName, memoTagsWeight,
|
||||
synonym, LuceneIndex.memoTagsFieldName, LuceneIndex.memoIdFieldName, memoTagsWeight,
|
||||
matchMap, memoIndexReader);
|
||||
}
|
||||
}
|
||||
}
|
||||
//long end1 = System.currentTimeMillis();
|
||||
//logger.info("项目名匹配帖子标题: " + (end1 - start1) + "ms");
|
||||
// 项目名匹配帖子标签 TODO :词频统计可直接从索引获得
|
||||
if (prjName.length() >= 2 && !Normalizer.isAllNumber(prjName)) {
|
||||
Set<String> tagStrings = resultPipeline.getTagbyPrjId(prjId); // 这里要返回标签匹配到的项目id;
|
||||
for (String tag : tagStrings) {
|
||||
matchMap = LuceneSearch.searchMemoTags(
|
||||
tag.toLowerCase(), LuceneIndex.memoTagsFieldName, memoTagsWeight,
|
||||
matchMap, memoIndexReader);
|
||||
}
|
||||
}
|
||||
//long end2 = System.currentTimeMillis();
|
||||
//logger.info("项目名匹配帖子标签: " + (end2 - end1) + "ms");
|
||||
// 用项目标签搜索帖子标题
|
||||
//HashMap<Integer, Integer> tagsCount = null;
|
||||
if (prjTags != null && prjTags.length() > 0 && matchMap.size() > 0) {
|
||||
List<String> prjTagsList = Normalizer.tagsSegmentation(prjTags);
|
||||
// 用项目标签搜索帖子标题
|
||||
for (String prjTag : prjTagsList) {
|
||||
matchMap = LuceneSearch.searchByPrjTagInMemoTitle(
|
||||
prjTag, LuceneIndex.titleFieldName, LuceneIndex.memoIdFieldName, prjTagsToMemoTitleWeight,
|
||||
matchMap, memoIndexReader);
|
||||
}
|
||||
/*tagsCount = LuceneSearch.tagsMatch(
|
||||
LuceneIndex.memoIdFieldName, prjTags,
|
||||
LuceneIndex.titleFieldName, indexReader);
|
||||
// tagsCount是<postId, x>,指每个post的标题匹配到的tag数量
|
||||
if (tagsCount.size() > 0) {
|
||||
for (Map.Entry<Integer, Integer> t : tagsCount.entrySet()) {
|
||||
int postId = t.getKey();
|
||||
int x = t.getValue();
|
||||
if (matchMap.containsKey(postId)) {
|
||||
matchMap.put(postId, matchMap.get(postId) + matchMap.get(postId) * 0.5 * (Math.log(x * x + 1) / Math.log(2)));
|
||||
}
|
||||
}
|
||||
}*/
|
||||
//long end3 = System.currentTimeMillis();
|
||||
//logger.info("用项目标签搜索帖子标题: " + (end3 - end2) + "ms");
|
||||
// 用项目标签搜索帖子标签
|
||||
for (String prjTag : prjTagsList) {
|
||||
matchMap = LuceneSearch.searchByPrjTag(
|
||||
prjTag, LuceneIndex.memoTagsFieldName, LuceneIndex.memoIdFieldName, prjTagsToMemoTagsWeight,
|
||||
matchMap, memoIndexReader);
|
||||
}
|
||||
/*tagsCount = LuceneSearch.tagsMatch(
|
||||
LuceneIndex.memoIdFieldName, prjTags,
|
||||
LuceneIndex.memoTagsFieldName, indexReader);
|
||||
if (tagsCount.size() > 0) {
|
||||
for (Map.Entry<Integer, Integer> t : tagsCount
|
||||
.entrySet()) {
|
||||
int postId = t.getKey();
|
||||
int y = t.getValue();
|
||||
if (matchMap.containsKey(postId)) {
|
||||
matchMap.put(postId, matchMap.get(postId) + matchMap.get(postId) * 0.6 * (Math.log(y * y + 1) / Math.log(2)));
|
||||
}
|
||||
}
|
||||
}*/
|
||||
//long end4 = System.currentTimeMillis();
|
||||
//logger.info("用项目标签搜索帖子标签: " + (end4 - end3) + "ms");
|
||||
}
|
||||
//long end5 = System.currentTimeMillis();
|
||||
if (matchMap.size() > 0)
|
||||
insertPrjToMemoMatchResult(prjId, matchMap);
|
||||
matchMap.clear();
|
||||
//long end6 = System.currentTimeMillis();
|
||||
//logger.info("当前项目匹配结果入库: " + (end6 - end5) + "ms");
|
||||
projectDao.updateProcessedPrj(prjId);
|
||||
logger.info("current prjId: " + prjId);
|
||||
}
|
||||
memoIndexReader.close();
|
||||
memoDire.close();
|
||||
prjIndexReader.close();
|
||||
prjDire.close();
|
||||
} catch (IOException e) {
|
||||
logger.error("prjToMemoMatch io error in Match: " + e);
|
||||
}
|
||||
|
@ -263,13 +210,13 @@ public class Match {
|
|||
public void memoToPrjMatch(List<RelativeMemo> memos) {
|
||||
HashMap<Integer, Double> matchMap = new HashMap<Integer, Double>();
|
||||
try {
|
||||
Directory dire = FSDirectory.open(Paths
|
||||
Directory prjDire = FSDirectory.open(Paths
|
||||
.get(LuceneIndex.PRJS_INDEX_PATH));
|
||||
if (!DirectoryReader.indexExists(dire)) { // if the index directory
|
||||
if (!DirectoryReader.indexExists(prjDire)) { // if the index directory
|
||||
// doesn't exist
|
||||
return;
|
||||
}
|
||||
IndexReader indexReader = DirectoryReader.open(dire);
|
||||
IndexReader prjIndexReader = DirectoryReader.open(prjDire);
|
||||
for(RelativeMemo memo : memos) {
|
||||
int memoId = 0;
|
||||
String memoTitle = "";
|
||||
|
@ -288,88 +235,48 @@ public class Match {
|
|||
if (Normalizer.isAllNumber(memoTitle)) {
|
||||
continue;
|
||||
}
|
||||
//long start1 = System.currentTimeMillis();
|
||||
List<String> memoTitleList = Normalizer.getList(memoTitle);
|
||||
if (memoTitleList.size() > 0) {
|
||||
// 帖子标题匹配项目名
|
||||
matchMap = LuceneSearch.memoToPrjMatchByLucene(memoTitle,
|
||||
memoTitleList, LuceneIndex.prjNameFieldName,
|
||||
titleWeight, matchMap, indexReader);
|
||||
titleWeight, matchMap, prjIndexReader);
|
||||
//帖子标题搜索项目别名
|
||||
matchMap = LuceneSearch.memoToPrjMatchByLucene(memoTitle,
|
||||
memoTitleList, LuceneIndex.prjSynonymsFieldName,
|
||||
titleWeight, matchMap, indexReader);
|
||||
titleWeight, matchMap, prjIndexReader);
|
||||
}
|
||||
//long end1 = System.currentTimeMillis();
|
||||
//logger.info("帖子标题匹配项目名: " + (end1 - start1) + "ms");
|
||||
List<String> memoTagsList = Normalizer.tagsSegmentation(memo.getTags().toLowerCase());
|
||||
// 用帖子标签搜索项目名
|
||||
if (memoTags.length() > 0) {
|
||||
Set<Integer> projectIds = resultPipeline.getPrjIdForMemo(memoId); // 这里要返回标签匹配到的项目id;
|
||||
for (int pId : projectIds) {
|
||||
if (matchMap.containsKey(pId)){
|
||||
matchMap.put(pId, matchMap.get(pId) + memoTagsWeight);
|
||||
}
|
||||
else {
|
||||
matchMap.put(pId, memoTagsWeight);
|
||||
}
|
||||
if (memoTags.length() > 0 && memoTagsList.size() > 0) {
|
||||
//用帖子标签搜索项目名
|
||||
for (String memoTag : memoTagsList) {
|
||||
matchMap = LuceneSearch.searchMemoTags(memoTag,
|
||||
LuceneIndex.prjNameFieldName, LuceneIndex.prjIdFieldName, memoTagsWeight, matchMap, prjIndexReader);
|
||||
}
|
||||
//用帖子标签搜索项目别名
|
||||
for (String tag : memoTagsList) {
|
||||
matchMap = LuceneSearch.searchMemoTags(tag,
|
||||
LuceneIndex.prjSynonymsFieldName, LuceneIndex.prjIdFieldName, memoTagsWeight, matchMap, prjIndexReader);
|
||||
}
|
||||
}
|
||||
//long end2 = System.currentTimeMillis();
|
||||
//logger.info("帖子标签搜索项目名: " + (end2 - end1) + "ms");
|
||||
if (matchMap.size() > 0) {
|
||||
// 帖子标题匹配项目标签
|
||||
if (memoTitleList != null && memoTitleList.size() > 0) {
|
||||
for (String memoTitleTerm : memoTitleList) {
|
||||
matchMap = LuceneSearch.searchByPrjTag(
|
||||
memoTitleTerm, LuceneIndex.prjTagsFieldName, LuceneIndex.prjIdFieldName, prjTagsToMemoTitleWeight,
|
||||
matchMap, indexReader);
|
||||
matchMap, prjIndexReader);
|
||||
}
|
||||
}
|
||||
//long end3 = System.currentTimeMillis();
|
||||
//logger.info("帖子标题匹配项目标签: " + (end3 - end2) + "ms");
|
||||
/*HashMap<Integer, Integer> tagsCount = null;
|
||||
tagsCount = LuceneSearch.tagsMatch(
|
||||
LuceneIndex.prjIdFieldName, memoTitle,
|
||||
LuceneIndex.prjTagsFieldName, indexReader);
|
||||
// tagsCount是<prjId, x>,x指匹配到prjTag的数量
|
||||
if (tagsCount.size() > 0) {
|
||||
for (Map.Entry<Integer, Integer> t : tagsCount
|
||||
.entrySet()) {
|
||||
int prjId = t.getKey();
|
||||
int x = t.getValue();
|
||||
if (matchMap.containsKey(prjId)) {
|
||||
matchMap.put(prjId, matchMap.get(prjId) + matchMap.get(prjId) * 0.5
|
||||
* (Math.log(x * x + 1) / Math.log(2)));
|
||||
}
|
||||
}
|
||||
} */
|
||||
|
||||
// 用帖子标签搜索项目标签
|
||||
if (memo.getTags() != null && memo.getTags().length() > 0) {
|
||||
List<String> memoTagsList = Normalizer.tagsSegmentation(memo.getTags().toLowerCase());
|
||||
for (String memoTag : memoTagsList) {
|
||||
matchMap = LuceneSearch.searchByPrjTag(
|
||||
memoTag, LuceneIndex.prjTagsFieldName, LuceneIndex.prjIdFieldName, prjTagsToMemoTagsWeight,
|
||||
matchMap, indexReader);
|
||||
matchMap, prjIndexReader);
|
||||
}
|
||||
}
|
||||
//long end4 = System.currentTimeMillis();
|
||||
//logger.info("帖子标签搜索项目标签: " + (end4 - end3) + "ms");
|
||||
/*if (memoTags.length() > 0) {
|
||||
tagsCount = LuceneSearch.tagsMatch(
|
||||
LuceneIndex.prjIdFieldName, memoTags,
|
||||
LuceneIndex.prjTagsFieldName, indexReader);
|
||||
if (tagsCount.size() > 0) {
|
||||
for (Map.Entry<Integer, Integer> t : tagsCount.entrySet()) {
|
||||
int prjId = t.getKey();
|
||||
int y = t.getValue();
|
||||
if (matchMap.containsKey(prjId)) {
|
||||
matchMap.put(prjId, matchMap.get(prjId) + matchMap.get(prjId) * 0.6
|
||||
* (Math.log(y * y + 1) / Math.log(2)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}*/
|
||||
if (matchMap.size() > 0)
|
||||
insertMemoToPrjMatchResult(memoId, matchMap);
|
||||
}
|
||||
|
@ -377,8 +284,8 @@ public class Match {
|
|||
matchMap.clear();
|
||||
logger.info("current MemoId: " + memoId);
|
||||
}
|
||||
indexReader.close();
|
||||
dire.close();
|
||||
prjIndexReader.close();
|
||||
prjDire.close();
|
||||
} catch (Exception e) {
|
||||
logger.error("memoToPrjMatch error in Match: " + e);
|
||||
}
|
||||
|
|
|
@ -11,21 +11,11 @@ import org.slf4j.LoggerFactory;
|
|||
|
||||
public class MatchIncrement extends Thread implements Runnable {
|
||||
private Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private int prjHistory; // the record of project after matching last time
|
||||
private int memoHistory; // the record of memo after matching last time
|
||||
private int prjIndexed; //the record of prj indexed to
|
||||
private int memoIndexed; //the record of memo indexed to
|
||||
private static File record; // record file "record.txt"
|
||||
private boolean newPrjComing; //新项目进入标识
|
||||
private boolean newMemoComing; //新帖子进入标识
|
||||
private boolean currentMatchDone = true; //当前批量匹配是否结束
|
||||
private int lastPrjId = 0; //最大项目id
|
||||
private int lastTagId = 0; //最大标签id
|
||||
private int tagHistory = 0; //标签匹配记录
|
||||
private boolean newTagflag; //新标签进入标识
|
||||
private boolean newPrePrjFlag; //新的项目需要做标签匹配
|
||||
private int prePrjHistory = 0; //项目与标签匹配记录
|
||||
private int taskToPrjId = 0;
|
||||
private int sleepTime = 0;
|
||||
|
||||
public boolean isNewPrjComing() {
|
||||
|
@ -52,91 +42,11 @@ public class MatchIncrement extends Thread implements Runnable {
|
|||
this.sleepTime = sleepTime;
|
||||
}
|
||||
|
||||
public int getTaskToPrjId() {
|
||||
return taskToPrjId;
|
||||
}
|
||||
|
||||
public void setTaskToPrjId(int taskToPrjId) {
|
||||
this.taskToPrjId = taskToPrjId;
|
||||
}
|
||||
|
||||
public int getMemoIndexed() {
|
||||
return memoIndexed;
|
||||
}
|
||||
|
||||
public void setMemoIndexed(int memoIndexed) {
|
||||
this.memoIndexed = memoIndexed;
|
||||
}
|
||||
|
||||
public int getPrjIndexed() {
|
||||
return prjIndexed;
|
||||
}
|
||||
|
||||
public void setPrjIndexed(int prjIndexed) {
|
||||
this.prjIndexed = prjIndexed;
|
||||
}
|
||||
|
||||
public boolean isNewPrePrjFlag() {
|
||||
return newPrePrjFlag;
|
||||
}
|
||||
|
||||
public void setNewPrePrjFlag(boolean newPrePrjFlag) {
|
||||
this.newPrePrjFlag = newPrePrjFlag;
|
||||
}
|
||||
|
||||
public int getPrePrjHistory() {
|
||||
return prePrjHistory;
|
||||
}
|
||||
|
||||
public void setPrePrjHistory(int prePrjHistory) {
|
||||
this.prePrjHistory = prePrjHistory;
|
||||
}
|
||||
|
||||
public int getTagHistory() {
|
||||
return tagHistory;
|
||||
}
|
||||
|
||||
public void setTagHistory(int tagHistory) {
|
||||
this.tagHistory = tagHistory;
|
||||
}
|
||||
|
||||
public boolean isNewTagflag() {
|
||||
return newTagflag;
|
||||
}
|
||||
|
||||
public void setNewTagflag(boolean newTagflag) {
|
||||
this.newTagflag = newTagflag;
|
||||
}
|
||||
|
||||
public void setLastPrjId(int id) {
|
||||
this.lastPrjId = id;
|
||||
}
|
||||
|
||||
public int getLastPrjId() {
|
||||
return lastPrjId;
|
||||
}
|
||||
|
||||
public int getLastTagId() {
|
||||
return lastTagId;
|
||||
}
|
||||
|
||||
public void setLastTagId(int id) {
|
||||
this.lastTagId = id;
|
||||
}
|
||||
|
||||
public MatchIncrement() {
|
||||
record = new File("record.txt");
|
||||
getMatchRecord();
|
||||
}
|
||||
|
||||
public int getPrjHistory() {
|
||||
return prjHistory;
|
||||
}
|
||||
|
||||
public synchronized void setPrjHistory(int prjHistory) {
|
||||
this.prjHistory = prjHistory;
|
||||
}
|
||||
|
||||
public int getMemoHistory() {
|
||||
return memoHistory;
|
||||
}
|
||||
|
@ -160,13 +70,7 @@ public class MatchIncrement extends Thread implements Runnable {
|
|||
public void getMatchRecord() {
|
||||
try {
|
||||
Scanner in = new Scanner(record);
|
||||
this.setPrjHistory(in.nextInt());
|
||||
this.setMemoHistory(in.nextInt());
|
||||
this.setTagHistory(in.nextInt());
|
||||
this.setPrePrjHistory(in.nextInt());
|
||||
this.setPrjIndexed(in.nextInt());
|
||||
this.setMemoIndexed(in.nextInt());
|
||||
this.setTaskToPrjId(in.nextInt());
|
||||
in.close();
|
||||
} catch (FileNotFoundException e) {
|
||||
logger.error("getMatchRecord FileNotFoundException: " + e);
|
||||
|
@ -179,8 +83,7 @@ public class MatchIncrement extends Thread implements Runnable {
|
|||
public void writeIntoRecord() {
|
||||
try {
|
||||
FileOutputStream fout = new FileOutputStream(record);
|
||||
fout.write((prjHistory + "\t" + memoHistory + "\t" + tagHistory
|
||||
+ "\t" + prePrjHistory + "\t" + prjIndexed + "\t" + memoIndexed + "\t" + taskToPrjId).getBytes());
|
||||
fout.write((memoHistory + "").getBytes());
|
||||
fout.close();
|
||||
} catch (FileNotFoundException e) {
|
||||
logger.error("writeIntoRecord FileNotFoundException: " + e);
|
||||
|
|
|
@ -2,7 +2,6 @@ package com.ossean.match.matchprocess;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.sql.SQLException;
|
||||
import java.text.DecimalFormat;
|
||||
import java.util.List;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
|
@ -41,7 +40,6 @@ public class MemoToPrjMatch implements Runnable {
|
|||
public void memoToPrjIncrement() throws SQLException,
|
||||
IOException {
|
||||
matchIncrement.setCurrentMatchDone(false);
|
||||
long start = System.currentTimeMillis();
|
||||
matchIncrement.getMatchRecord();
|
||||
int startId = matchIncrement.getMemoHistory();
|
||||
int endId = startId + step;
|
||||
|
@ -49,25 +47,15 @@ public class MemoToPrjMatch implements Runnable {
|
|||
if ((lastMemoId - endId) <= step) {
|
||||
endId = lastMemoId;
|
||||
}
|
||||
LuceneIndex luceneIndex = new LuceneIndex();
|
||||
while (startId < lastMemoId) {
|
||||
long batchStart = System.currentTimeMillis();
|
||||
List<RelativeMemo> memos = memoDao.getMemoInfo(startId, endId);
|
||||
if (memos != null && memos.size() > 0) {
|
||||
match.memoToPrjMatch(memos);
|
||||
matchIncrement.setMemoHistory(endId);
|
||||
if (startId > matchIncrement.getMemoIndexed()) {
|
||||
LuceneIndex luceneIndex = new LuceneIndex();
|
||||
int memoIndexed = luceneIndex.memoIndexIncrement(memos);
|
||||
// adding memos index and save the record
|
||||
matchIncrement.setMemoIndexed(memoIndexed);
|
||||
}
|
||||
// adding memos index and save the record
|
||||
luceneIndex.memoIndexIncrement(memos);
|
||||
}
|
||||
long batchEnd = System.currentTimeMillis();
|
||||
DecimalFormat df = new DecimalFormat( "0.00");
|
||||
double totalTime = (double)(batchEnd - batchStart)/60000;
|
||||
double rate = (double)(endId - startId)/((batchEnd - batchStart)/1000);
|
||||
logger.info("current Batch memo-->prj total time: " + df.format(totalTime) + "min; "
|
||||
+ " average: " + df.format(rate) + " 条/s");
|
||||
logger.info("relative_memos_to_osps matched to : "
|
||||
+ matchIncrement.getMemoHistory());
|
||||
logger.info("relative_memos_to_osps remain_to_match : "
|
||||
|
@ -83,13 +71,8 @@ public class MemoToPrjMatch implements Runnable {
|
|||
endId += step;
|
||||
}
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
DecimalFormat df = new DecimalFormat( "0.00");
|
||||
double totalTime = (double)(end - start)/60000;
|
||||
logger.info("memo-->prj total time: " + df.format(totalTime) + "min");
|
||||
matchIncrement.setCurrentMatchDone(true);
|
||||
}
|
||||
|
||||
public void run() {
|
||||
if (matchIncrement.isNewMemoComing()) {
|
||||
logger.info("start relative_memos_to_osps match!!!");
|
||||
|
|
|
@ -26,13 +26,9 @@ public class NewPrjMonitor {
|
|||
|
||||
public void run() {
|
||||
int newPrjCount = projectDao.getNewPrjCount();
|
||||
int lastPrjId = projectDao.getNewLast();
|
||||
int lastMemoId = memoDao.getLastMemoId();
|
||||
int lastTagId = tagDao.getNewLast();
|
||||
if (newPrjCount > 0) {
|
||||
matchIncrement.setNewPrjComing(true);
|
||||
matchIncrement.setLastPrjId(lastPrjId);
|
||||
|
||||
} else {
|
||||
matchIncrement.setNewPrjComing(false);
|
||||
}
|
||||
|
@ -42,19 +38,6 @@ public class NewPrjMonitor {
|
|||
matchIncrement.setNewMemoComing(false);
|
||||
}
|
||||
|
||||
if (newPrjCount > 0) {
|
||||
matchIncrement.setNewPrePrjFlag(true);
|
||||
} else {
|
||||
matchIncrement.setNewPrePrjFlag(false);
|
||||
}
|
||||
|
||||
if (lastTagId > matchIncrement.getTagHistory()) {
|
||||
matchIncrement.setNewTagflag(true);
|
||||
matchIncrement.setLastTagId(lastTagId);
|
||||
|
||||
} else {
|
||||
matchIncrement.setNewTagflag(false);
|
||||
}
|
||||
if (!matchIncrement.isNewMemoComing()&&!matchIncrement.isNewPrjComing()) {
|
||||
matchIncrement.setSleepTime(30000);
|
||||
}else {
|
||||
|
|
|
@ -2,7 +2,6 @@ package com.ossean.match.matchprocess;
|
|||
|
||||
import java.io.IOException;
|
||||
import java.sql.SQLException;
|
||||
import java.text.DecimalFormat;
|
||||
import java.util.List;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
|
@ -34,73 +33,31 @@ public class PrjToMemoMatch implements Runnable {
|
|||
|
||||
public void prjToMemoIncrement() throws IOException, SQLException {
|
||||
matchIncrement.setCurrentMatchDone(false);
|
||||
long start = System.currentTimeMillis();
|
||||
matchIncrement.getMatchRecord();
|
||||
List<Project> newPrjs = projectDao.getBatchPrjsIncre(step);// 批量读取filtration为1或者filtration为2且update_mark不为空的项目。即未匹配的和已匹配但有更新的。
|
||||
if(newPrjs == null || newPrjs.size() == 0){
|
||||
matchIncrement.setPrjHistory(matchIncrement.getLastPrjId());
|
||||
matchIncrement.writeIntoRecord();
|
||||
}
|
||||
// 批量读取filtration为1或者filtration为2且update_mark不为空的项目。即未匹配的和已匹配但有更新的。
|
||||
List<Project> newPrjs = projectDao.getBatchPrjsIncre(step);
|
||||
while (newPrjs != null && newPrjs.size() > 0) {
|
||||
long batchStart = System.currentTimeMillis();
|
||||
long end1 = System.currentTimeMillis();
|
||||
logger.info("批量读项目: " + (end1 - batchStart) + "ms");
|
||||
if (newPrjs != null) {
|
||||
match.prjToMemoMatch(newPrjs);
|
||||
matchIncrement.setPrjHistory(newPrjs.get(newPrjs.size()-1).getId());
|
||||
// adding projects index and save the record
|
||||
// adding projects index
|
||||
LuceneIndex luceneIndex = new LuceneIndex();
|
||||
int prjIndexed = luceneIndex.prjIndexIncrement(newPrjs);
|
||||
matchIncrement.setPrjIndexed(prjIndexed);
|
||||
luceneIndex.prjIndexIncrement(newPrjs);
|
||||
}
|
||||
long batchEnd = System.currentTimeMillis();
|
||||
DecimalFormat df = new DecimalFormat( "0.00");
|
||||
double totalTime = (double)(batchEnd - batchStart)/60000;
|
||||
double rate = (double)(newPrjs.size())/((batchEnd - batchStart)/1000);
|
||||
logger.info("current Batch prj-->memo total time: " + df.format(totalTime) + "min; "
|
||||
+ " average: " + df.format(rate) + " 条/s");
|
||||
// finishing matching, writing the record to "record.txt"
|
||||
matchIncrement.writeIntoRecord();
|
||||
newPrjs = projectDao.getBatchPrjsIncre(step);// getting projects' id and prjName in batches
|
||||
// getting projects' id and prjName in batches
|
||||
newPrjs = projectDao.getBatchPrjsIncre(step);
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
DecimalFormat df = new DecimalFormat( "0.00");
|
||||
double totalTime = (double)(end - start)/60000;
|
||||
logger.info("prj-->memo total time: " + df.format(totalTime) + "min");
|
||||
matchIncrement.setCurrentMatchDone(true);
|
||||
}
|
||||
|
||||
public void run() {
|
||||
if (matchIncrement.getTaskToPrjId() != 0) {
|
||||
int startId = matchIncrement.getPrjHistory();
|
||||
int endId = matchIncrement.getTaskToPrjId();
|
||||
long batchStart = System.currentTimeMillis();
|
||||
List<Project> newPrjs = projectDao.getBatchPrjs(startId, endId);// getting projects' id and prjName in batches
|
||||
long end1 = System.currentTimeMillis();
|
||||
logger.info("批量读项目: " + (end1 - batchStart) + "ms");
|
||||
if (newPrjs != null) {
|
||||
match.prjToMemoMatch(newPrjs);
|
||||
}
|
||||
long batchEnd = System.currentTimeMillis();
|
||||
DecimalFormat df = new DecimalFormat( "0.00");
|
||||
double totalTime = (double)(batchEnd - batchStart)/60000;
|
||||
double rate = (double)(endId - startId)/((batchEnd - batchStart)/1000);
|
||||
logger.info("current Batch prj-->memo total time: " + df.format(totalTime) + "min; "
|
||||
+ " average: " + df.format(rate) + " 条/s");
|
||||
logger.info("osps_to_relative_memos matched to : "
|
||||
+ matchIncrement.getPrjHistory());
|
||||
System.exit(0);
|
||||
}
|
||||
else {
|
||||
if (matchIncrement.isNewPrjComing()) {
|
||||
logger.info("start osps_to_relative_memos match!!!");
|
||||
try {
|
||||
prjToMemoIncrement();
|
||||
} catch (IOException e) {
|
||||
logger.error("IOException: " + e);
|
||||
} catch (SQLException e) {
|
||||
logger.error("SQLException: " + e);
|
||||
}
|
||||
if (matchIncrement.isNewPrjComing()) {
|
||||
logger.info("start osps_to_relative_memos match!!!");
|
||||
try {
|
||||
prjToMemoIncrement();
|
||||
} catch (IOException e) {
|
||||
logger.error("IOException: " + e);
|
||||
} catch (SQLException e) {
|
||||
logger.error("SQLException: " + e);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -1,68 +0,0 @@
|
|||
package com.ossean.match.model;
|
||||
|
||||
public class Taggings {
|
||||
private int id;
|
||||
private int tag_id;
|
||||
private int taggable_id;
|
||||
private String taggable_type;
|
||||
private int tagger_id;
|
||||
private String tagger_type;
|
||||
private String context;
|
||||
private String created_at;
|
||||
private int disagree_num;
|
||||
public int getId() {
|
||||
return id;
|
||||
}
|
||||
public void setId(int id) {
|
||||
this.id = id;
|
||||
}
|
||||
public int getTag_id() {
|
||||
return tag_id;
|
||||
}
|
||||
public void setTag_id(int tag_id) {
|
||||
this.tag_id = tag_id;
|
||||
}
|
||||
public int getTaggable_id() {
|
||||
return taggable_id;
|
||||
}
|
||||
public void setTaggable_id(int taggable_id) {
|
||||
this.taggable_id = taggable_id;
|
||||
}
|
||||
public String getTaggable_type() {
|
||||
return taggable_type;
|
||||
}
|
||||
public void setTaggable_type(String taggable_type) {
|
||||
this.taggable_type = taggable_type;
|
||||
}
|
||||
public int getTagger_id() {
|
||||
return tagger_id;
|
||||
}
|
||||
public void setTagger_id(int tagger_id) {
|
||||
this.tagger_id = tagger_id;
|
||||
}
|
||||
public String getTagger_type() {
|
||||
return tagger_type;
|
||||
}
|
||||
public void setTagger_type(String tagger_type) {
|
||||
this.tagger_type = tagger_type;
|
||||
}
|
||||
public String getContext() {
|
||||
return context;
|
||||
}
|
||||
public void setContext(String context) {
|
||||
this.context = context;
|
||||
}
|
||||
public String getCreated_at() {
|
||||
return created_at;
|
||||
}
|
||||
public void setCreated_at(String created_at) {
|
||||
this.created_at = created_at;
|
||||
}
|
||||
public int getDisagree_num() {
|
||||
return disagree_num;
|
||||
}
|
||||
public void setDisagree_num(int disagree_num) {
|
||||
this.disagree_num = disagree_num;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,57 +0,0 @@
|
|||
package com.ossean.match.pipeline;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import com.ossean.match.dao.AtomicItemDao;
|
||||
import com.ossean.match.model.Atom;
|
||||
|
||||
@Component
|
||||
public class AtomicItemPipeline {
|
||||
|
||||
@Resource
|
||||
private AtomicItemDao atomicItemDao;
|
||||
|
||||
// 把原子性的
|
||||
@Transactional
|
||||
public void pipelineAtoms(Map<String, ArrayList<Integer>> atoms) {
|
||||
// TODO Auto-generated method stub
|
||||
Set<String> atomNames = atoms.keySet();
|
||||
ArrayList<Integer> nums;
|
||||
for (String name : atomNames) {
|
||||
nums = atoms.get(name);
|
||||
atomicItemDao.insertAtomicItem(name, nums.get(0), nums.get(1));
|
||||
}
|
||||
}
|
||||
|
||||
@Transactional
|
||||
// 0: 未更新
|
||||
// 1: 数据更新
|
||||
// 2: 数据插入
|
||||
public void pipelineAtoms(List<Atom> atoms) {
|
||||
// TODO Auto-generated method stub
|
||||
for (Atom atom : atoms) {
|
||||
int status = atom.getStatus();
|
||||
switch (status) {
|
||||
case 0:
|
||||
break;
|
||||
case 1:
|
||||
atomicItemDao.updateAtomicItem(atom.getName(),
|
||||
atom.getNumTag(), atom.getNumInProj());
|
||||
break;
|
||||
default:
|
||||
atomicItemDao.insertAtomicItem(atom.getName(),
|
||||
atom.getNumTag(), atom.getNumInProj());
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,163 +0,0 @@
|
|||
package com.ossean.match.pipeline;
|
||||
|
||||
import java.util.HashSet;
|
||||
import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
|
||||
import com.ossean.match.dao.ResultDao;
|
||||
import com.ossean.match.dao.TaggingDao;
|
||||
import com.ossean.match.model.Project;
|
||||
|
||||
@Component
|
||||
public class ResultPipeline {
|
||||
@Resource
|
||||
private ResultDao resultDao;
|
||||
@Resource
|
||||
private TaggingDao taggingDao;
|
||||
|
||||
private Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private LinkedList<Item> items1 = new LinkedList<Item>();
|
||||
private LinkedList<Item> items2 = new LinkedList<Item>();
|
||||
private LinkedList<Item> items3 = new LinkedList<Item>();
|
||||
private LinkedList<Project> projects = new LinkedList<Project>();
|
||||
|
||||
@Transactional
|
||||
public void insertResult(int prjId, String string, int tagId,
|
||||
String tagOriginalName, float score) {
|
||||
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public Set<Integer> getPrjIdForMemo(int memoId) {
|
||||
List<Integer> tagIds = taggingDao.getTagIdForMemo(memoId);
|
||||
Set<Integer> result = new HashSet<Integer>();
|
||||
for (int tagId : tagIds) {
|
||||
List<Integer> tempIntegers = resultDao.getMatchPrjId(tagId);
|
||||
result.addAll(tempIntegers);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public Set<String> getTagbyPrjId(int prjId){
|
||||
List<String> tags = resultDao.getMatchTag(prjId);
|
||||
Set<String> result = new HashSet<String>();
|
||||
for (String tag : tags) {
|
||||
// List<String> tempIntegers = resultDao.getMatchPrjId(tagId);
|
||||
result.add(tag);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void updateRank(Project project, int flag) {
|
||||
if (flag == 1) {
|
||||
projects.add(project);
|
||||
if (projects.size() >= 10000) {
|
||||
for (Project tproject : projects) {
|
||||
System.out.println("handle project:" + tproject.getId());
|
||||
resultDao.updateRank(tproject.getRank(),
|
||||
tproject.getStar(), tproject.getId());
|
||||
}
|
||||
projects.clear();
|
||||
}
|
||||
} else {
|
||||
for (Project tproject : projects) {
|
||||
System.out.println("handle project:" + tproject.getId());
|
||||
resultDao.updateRank(tproject.getRank(), tproject.getStar(),
|
||||
tproject.getId());
|
||||
}
|
||||
projects.clear();
|
||||
}
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void insertResult1(int prjId, String prjName, int tagId,
|
||||
String tagName, float score) {
|
||||
items1.add(new Item(prjId, prjName, tagId, tagName, score));
|
||||
// 10000条输入数据库一次
|
||||
if (items1.size() >= 10000) {
|
||||
for (Item item : items1) {
|
||||
resultDao.insertResult1(item.getPrjId(), item.getPrjName(),
|
||||
item.getTagId(), item.getTagName(), item.getScore());
|
||||
}
|
||||
items1.clear();
|
||||
}
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void insertResult2(int prjId, String prjName, int tagId,
|
||||
String tagName, float score) {
|
||||
items2.add(new Item(prjId, prjName, tagId, tagName, score));
|
||||
// 10000条输入数据库一次
|
||||
if (items2.size() >= 10000) {
|
||||
for (Item item : items2) {
|
||||
resultDao.insertResult2(item.getPrjId(), item.getPrjName(),
|
||||
item.getTagId(), item.getTagName(), item.getScore());
|
||||
}
|
||||
items2.clear();
|
||||
}
|
||||
}
|
||||
|
||||
@Transactional
|
||||
public void insertResult3(int prjId, String prjName, int tagId,
|
||||
String tagName, float score) {
|
||||
items3.add(new Item(prjId, prjName, tagId, tagName, score));
|
||||
// 10000条输入数据库一次
|
||||
if (items3.size() >= 10000) {
|
||||
logger.info("match tag and project :" + prjId);
|
||||
for (Item item : items3) {
|
||||
resultDao.insertResult3(item.getPrjId(), item.getPrjName(),
|
||||
item.getTagId(), item.getTagName(), item.getScore());
|
||||
}
|
||||
items3.clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class Item {
|
||||
|
||||
public Item(int prjId, String prjName, int tagId, String tagName,
|
||||
float score) {
|
||||
super();
|
||||
this.tagName = tagName;
|
||||
this.prjId = prjId;
|
||||
this.prjName = prjName;
|
||||
this.tagId = tagId;
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
public int getPrjId() {
|
||||
return prjId;
|
||||
}
|
||||
|
||||
public String getPrjName() {
|
||||
return prjName;
|
||||
}
|
||||
|
||||
public int getTagId() {
|
||||
return tagId;
|
||||
}
|
||||
|
||||
public float getScore() {
|
||||
return score;
|
||||
}
|
||||
|
||||
public String getTagName() {
|
||||
return tagName;
|
||||
}
|
||||
|
||||
int prjId;
|
||||
String prjName;
|
||||
int tagId;
|
||||
String tagName;
|
||||
float score;
|
||||
}
|
|
@ -38,7 +38,7 @@
|
|||
<property name="url"
|
||||
value="jdbc:mysql://localhost:3306/ossean_production?characterEncoding=UTF-8" />
|
||||
<property name="username" value="root" />
|
||||
<property name="password" value="1234" />
|
||||
<property name="password" value="123456" />
|
||||
<property name="initialSize" value="5" />
|
||||
<property name="maxActive" value="100" />
|
||||
</bean>
|
||||
|
|
|
@ -16,4 +16,4 @@ JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTe
|
|||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.HotwordsMain >>log/hotwords.log 2>&1 &
|
||||
java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.HotwordsMain > log/hotwords.log 2>&1 &
|
|
@ -17,4 +17,4 @@ JVM_ARGS="-Xmn98m -Xmx1024m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxT
|
|||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.ProjectsFilterMain >>log/projectsfilter.log 2>&1 &
|
||||
java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.ProjectsFilterMain > log/projectsfilter.log 2>&1 &
|
|
@ -39,9 +39,9 @@
|
|||
destroy-method="close">
|
||||
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
|
||||
<property name="url"
|
||||
value="jdbc:mysql://172.16.128.30:3306/ossean_production?characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&autoReconnect=true" />
|
||||
value="jdbc:mysql://172.16.128.36:3306/ossean_production?characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&autoReconnect=true" />
|
||||
<property name="username" value="gather" />
|
||||
<property name="password" value="influx1234" />
|
||||
<property name="password" value="Influx@1234" />
|
||||
<property name="validationQuery" value="SELECT 1" />
|
||||
<property name="testOnBorrow" value="true"/>
|
||||
</bean>
|
||||
|
|
|
@ -17,4 +17,4 @@ JVM_ARGS="-Xmn98m -Xmx1024m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxT
|
|||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.SyncMatchResultMain >>log/syncmatchresult.log 2>&1 &
|
||||
java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.SyncMatchResultMain > log/syncmatchresult.log 2>&1 &
|
|
@ -17,4 +17,4 @@ JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTe
|
|||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.UpdateOspTagsMain >>log/updateosptags.log 2>&1 &
|
||||
java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.UpdateOspTagsMain > log/updateosptags.log 2>&1 &
|
|
@ -16,7 +16,8 @@ public class UpdateOspTagsMain {
|
|||
private UpdateOspTags updateTags;
|
||||
|
||||
public void start(){
|
||||
updateTags.updateOspTags();
|
||||
// updateTags.updateOspTags();
|
||||
updateTags.restorePrjTaggings();
|
||||
}
|
||||
|
||||
public static void main(String[] args){
|
||||
|
|
|
@ -13,6 +13,7 @@ import com.ossean.projectmanager.lasttabledao.TagDao;
|
|||
import com.ossean.projectmanager.lasttabledao.TaggingsDao;
|
||||
import com.ossean.projectmanager.model.OpenSourceProject;
|
||||
import com.ossean.projectmanager.model.Taggings;
|
||||
import com.ossean.projectmanager.utils.DataHandler;
|
||||
|
||||
/**
|
||||
*
|
||||
|
@ -28,6 +29,9 @@ public class UpdateOspTags {
|
|||
private TagDao tagDao;
|
||||
@Resource
|
||||
private TaggingsDao taggingsDao;
|
||||
|
||||
private String taggingsTableName = "taggings";
|
||||
private String tagsTableName = "tags";
|
||||
|
||||
/**
|
||||
* 根据taggings更新项目标签字段tags和权重更高的标签字段tags_for_search
|
||||
|
@ -67,5 +71,50 @@ public class UpdateOspTags {
|
|||
}
|
||||
|
||||
}
|
||||
|
||||
public void restorePrjTaggings(){
|
||||
logger.info("start updateOspTags ......");
|
||||
int start = 0;
|
||||
int prjId = 0;
|
||||
while (start < 2000000) {
|
||||
List<OpenSourceProject> ospList = ospDao.getProjectsByBatch(start,
|
||||
5000);
|
||||
for (OpenSourceProject osp : ospList) {
|
||||
prjId = osp.getId();
|
||||
// 将tag和项目的关系存入表item_tag_relation 并分离tag
|
||||
String tags = osp.getTags();
|
||||
if (tags == null) {
|
||||
// 表示该项目没有标签
|
||||
continue;
|
||||
}
|
||||
List<String> tagList = DataHandler
|
||||
.tagsSegmentation(tags);
|
||||
for (String tag : tagList) {
|
||||
ospDao.insertTag(tagsTableName, tag);// ignore方式插入该项目的标签
|
||||
int tag_id = ospDao.selectTagIdByName(
|
||||
tagsTableName, tag);
|
||||
Taggings taggings = new Taggings();
|
||||
taggings.setTag_id(tag_id);
|
||||
taggings.setTaggable_id(prjId);
|
||||
taggings.setTaggable_type("OpenSourceProject");
|
||||
taggings.setContext("tags");
|
||||
taggings.setCreated_at(DataHandler.getNow());
|
||||
// 将Taggings对象存入数据库中
|
||||
try {
|
||||
ospDao.insertTaggings(taggingsTableName,
|
||||
taggings);
|
||||
} catch (Exception e) {
|
||||
// 在插入记录之前 relative_memos表中的记录已经被删除掉了
|
||||
logger.error("insert taggins error: " + e);
|
||||
System.exit(0);
|
||||
}
|
||||
}
|
||||
logger.info("currentPrjId: " + prjId);
|
||||
}
|
||||
logger.info("last prj batch end, currentPrjId: " + prjId);
|
||||
start = prjId + 5000;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
@ -2,15 +2,32 @@ package com.ossean.projectmanager.lasttabledao;
|
|||
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.ibatis.annotations.Insert;
|
||||
import org.apache.ibatis.annotations.Param;
|
||||
import org.apache.ibatis.annotations.Select;
|
||||
import org.apache.ibatis.annotations.Update;
|
||||
|
||||
import com.ossean.projectmanager.model.OpenSourceProject;
|
||||
import com.ossean.projectmanager.model.Taggings;
|
||||
|
||||
public interface OpenSourceProjectDao {
|
||||
|
||||
//获得最大项目id
|
||||
|
||||
// 向tag表存储数据
|
||||
@Insert("insert ignore into ${table} (name) values (#{name})")
|
||||
public void insertTag(@Param("table") String table,
|
||||
@Param("name") String name);
|
||||
|
||||
// 根据tag的name找tag的id
|
||||
@Select("select id from ${table} where name=#{name}")
|
||||
public int selectTagIdByName(@Param("table") String table,
|
||||
@Param("name") String name);
|
||||
|
||||
// 插入Taggings对象
|
||||
@Insert("insert ignore into ${table} (tag_id,taggable_id,taggable_type,context,created_at,disagree_num) values (#{model.tag_id},#{model.taggable_id},#{model.taggable_type},#{model.context},#{model.created_at},#{model.disagree_num})")
|
||||
public void insertTaggings(@Param("table") String table,
|
||||
@Param("model") Taggings model);
|
||||
|
||||
// 获得最大项目id
|
||||
@Select("select MAX(id) from open_source_projects")
|
||||
public Integer getNewLast();
|
||||
|
||||
|
@ -19,10 +36,9 @@ public interface OpenSourceProjectDao {
|
|||
public List<OpenSourceProject> getProjectsByBatch(
|
||||
@Param("start") int start, @Param("size") int size);
|
||||
|
||||
//更新项目标签字段tags和权重更高的标签字段tags_for_search
|
||||
// 更新项目标签字段tags和权重更高的标签字段tags_for_search
|
||||
@Update("update open_source_projects set tags=#{tags}, tags_for_search = #{tagsForSearch} where id=#{id}")
|
||||
public void updatePrjTags(@Param("id") int id,
|
||||
@Param("tags") String tags,
|
||||
public void updatePrjTags(@Param("id") int id, @Param("tags") String tags,
|
||||
@Param("tagsForSearch") String tagsForSearch);
|
||||
|
||||
// 对项目标签属性进行更新
|
||||
|
|
|
@ -68,9 +68,9 @@ public class ProjectsFilter {
|
|||
.getOpenHubPrjByUrl(url); // 根据url从openhub的项目分表获得项目信息
|
||||
if (openhubProject != null
|
||||
&& openhubProject.getName() != null
|
||||
&& openhubProject.getName() != ""
|
||||
&& !"".equals(openhubProject.getName())
|
||||
&& openhubProject.getDescription() != null
|
||||
&& openhubProject.getDescription() != ""
|
||||
&& !"".equals(openhubProject.getDescription())
|
||||
&& openhubProject.getCodeRepository() != null
|
||||
&& !openhubProject.getCodeRepository().contains(
|
||||
"add a code location")) { // openhub的筛选条件为name、description不为空,且该项目有版本库
|
||||
|
@ -80,10 +80,11 @@ public class ProjectsFilter {
|
|||
matchResultDao.deleteMatchResult(
|
||||
getTargetTable(project.getId()),
|
||||
project.getId()); // 删除该项目的匹配结果,确保无之前的匹配结果
|
||||
} else {
|
||||
lastProjectDao.updateFiltratedPrj(project.getId(),
|
||||
2); // 筛选标识由1或2变为2,表示该项目之前就是筛选作为保留的
|
||||
}
|
||||
}
|
||||
// else {
|
||||
// lastProjectDao.updateFiltratedPrj(project.getId(),
|
||||
// 2); // 筛选标识由1或2变为2,表示该项目之前就是筛选作为保留的
|
||||
// }
|
||||
} else {
|
||||
lastProjectDao.updateFiltratedPrj(project.getId(), 0); // 筛选标识变为0,表示该项目不保留
|
||||
matchResultDao.deleteMatchResult(
|
||||
|
@ -95,9 +96,9 @@ public class ProjectsFilter {
|
|||
.getSourceForgePrjByUrl(url); // 根据url从SourceForge的项目分表获得项目信息
|
||||
if (sourceforgeProject != null
|
||||
&& sourceforgeProject.getName() != null
|
||||
&& sourceforgeProject.getName() != ""
|
||||
&& !"".equals(sourceforgeProject.getName())
|
||||
&& sourceforgeProject.getDescription() != null
|
||||
&& sourceforgeProject.getDescription() != ""
|
||||
&& !"".equals(sourceforgeProject.getDescription())
|
||||
&& ((sourceforgeProject.getDownload_num() > 0) || (sourceforgeProject
|
||||
.getStars() > 0))) {
|
||||
if (project.getFilration() == 0) {
|
||||
|
@ -106,10 +107,11 @@ public class ProjectsFilter {
|
|||
matchResultDao.deleteMatchResult(
|
||||
getTargetTable(project.getId()),
|
||||
project.getId()); // 删除该项目的匹配结果,确保无之前的匹配结果
|
||||
} else {
|
||||
lastProjectDao.updateFiltratedPrj(project.getId(),
|
||||
2); // 筛选标识由1或2变为2,表示该项目之前就是筛选作为保留的
|
||||
}
|
||||
}
|
||||
// else {
|
||||
// lastProjectDao.updateFiltratedPrj(project.getId(),
|
||||
// 2); // 筛选标识由1或2变为2,表示该项目之前就是筛选作为保留的
|
||||
// }
|
||||
} else {
|
||||
lastProjectDao.updateFiltratedPrj(project.getId(), 0); // 筛选标识变为0,表示该项目不保留
|
||||
matchResultDao.deleteMatchResult(
|
||||
|
@ -122,9 +124,10 @@ public class ProjectsFilter {
|
|||
matchResultDao.deleteMatchResult(
|
||||
getTargetTable(project.getId()),
|
||||
project.getId()); // 删除该项目的匹配结果,确保无之前的匹配结果
|
||||
} else {
|
||||
lastProjectDao.updateFiltratedPrj(project.getId(), 2); // 筛选标识由1或2变为2,表示该项目之前就是筛选作为保留的
|
||||
}
|
||||
}
|
||||
// else {
|
||||
// lastProjectDao.updateFiltratedPrj(project.getId(), 2); // 筛选标识由1或2变为2,表示该项目之前就是筛选作为保留的
|
||||
// }
|
||||
} else {
|
||||
logger.info("Unknown source... source = " + source);
|
||||
}
|
||||
|
|
|
@ -0,0 +1,38 @@
|
|||
package com.ossean.projectmanager.utils;
|
||||
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
public class DataHandler {
|
||||
|
||||
// 获取当前时间的String
|
||||
public static String getNow() {
|
||||
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
|
||||
String result = sdf.format(new Date());
|
||||
return result;
|
||||
}
|
||||
|
||||
// 标签分离函数
|
||||
public static List<String> tagsSegmentation(String tags) {
|
||||
List<String> tag = new ArrayList<String>();
|
||||
|
||||
if (tags != null) {
|
||||
String regex = "<[^<>]*>";
|
||||
Pattern pattern = Pattern.compile(regex);
|
||||
Matcher matcher = pattern.matcher(tags);
|
||||
|
||||
while (matcher.find()) {
|
||||
String t = matcher.group();
|
||||
t = t.substring(1, t.length() - 1);
|
||||
|
||||
tag.add(t);
|
||||
}
|
||||
}
|
||||
return tag;
|
||||
}
|
||||
|
||||
}
|
|
@ -19,9 +19,9 @@
|
|||
destroy-method="close">
|
||||
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
|
||||
<property name="url"
|
||||
value="jdbc:mysql://172.16.128.33:3306/ossean_production?characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&autoReconnect=true" />
|
||||
value="jdbc:mysql://localhost:3306/ossean_production?characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&autoReconnect=true" />
|
||||
<property name="username" value="root" />
|
||||
<property name="password" value="NUDTpdl@" />
|
||||
<property name="password" value="NUDTpdl@123" />
|
||||
<property name="validationQuery" value="SELECT 1" />
|
||||
<property name="testOnBorrow" value="true"/>
|
||||
</bean>
|
||||
|
|
|
@ -46,7 +46,7 @@
|
|||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>3.8.1</version>
|
||||
<version>4.10</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
|
|
|
@ -16,15 +16,35 @@ public class ClearTable {
|
|||
|
||||
Logger logger = Logger.getLogger(ClearTable.class);
|
||||
public void start(){
|
||||
truncateTable("edd_pointers");
|
||||
truncateTable("synonyms");
|
||||
truncateTable("edd_relations");
|
||||
truncateTable("synonymmings");
|
||||
truncateTable("open_source_projects");
|
||||
deleteTaggings4Project();
|
||||
deleteItemInEddPointers("gather_projects", "edd_relations");
|
||||
//deleteTaggings4Project();
|
||||
|
||||
logger.info("完成去重程序运行前数据表的清空和taggings表OpenSourceProject的删除");
|
||||
}
|
||||
|
||||
//删除edd_pointers表中transfer对应的数据
|
||||
public void deleteItemInEddPointers(String sourceTableName, String targetTableName){
|
||||
String sql = "delete from edd_pointers where SourceTableName=? and TargetTableName=?";
|
||||
Connection conn = getConnection();
|
||||
if(conn == null){
|
||||
logger.info("没有获取到Connection");
|
||||
System.exit(-1);
|
||||
}
|
||||
try {
|
||||
PreparedStatement ps = conn.prepareStatement(sql);
|
||||
ps.setString(1, sourceTableName);
|
||||
ps.setString(2, targetTableName);
|
||||
ps.execute();
|
||||
ps.close();
|
||||
conn.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
public void truncateTable(String table){
|
||||
String sql = "truncate table " + table;
|
||||
Connection conn = getConnection();
|
||||
|
@ -65,7 +85,7 @@ public class ClearTable {
|
|||
Connection conn = null;
|
||||
try {
|
||||
Class.forName("com.mysql.jdbc.Driver").newInstance();
|
||||
conn = DriverManager.getConnection("jdbc:mysql://172.16.128.30:3306/ossean", "root", "123456");
|
||||
conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/ossean_production", "root", "NUDTpdl@123");
|
||||
} catch (InstantiationException e) {
|
||||
e.printStackTrace();
|
||||
} catch (IllegalAccessException e) {
|
||||
|
|
|
@ -1,98 +1,113 @@
|
|||
package com.ossean;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.context.support.ClassPathXmlApplicationContext;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import com.ossean.databaseSource.DBSource;
|
||||
import com.ossean.databaseSource.GatherDao;
|
||||
import com.ossean.databaseSource.UpdateControlProjectsDao;
|
||||
import com.ossean.model.GatherProjectsModel;
|
||||
import com.ossean.util.MergeProjectNew;
|
||||
import com.ossean.util.MergeProjectsUtil;
|
||||
|
||||
@Component
|
||||
public class MergeProjects {
|
||||
Logger logger = Logger.getLogger(this.getClass());
|
||||
@Resource
|
||||
private DBSource dbSource;
|
||||
@Resource
|
||||
private GatherDao gatherDao;
|
||||
@Resource
|
||||
private UpdateControlProjectsDao updateControlDao;
|
||||
|
||||
@Qualifier("mergeProjectNew")
|
||||
@Autowired
|
||||
private MergeProjectNew mergeProjectNew;
|
||||
|
||||
private int startId1;
|
||||
|
||||
private static String pointerTableName = "edd_pointers";
|
||||
private static String sourceTableName = "gather_projects";
|
||||
private static String targetTableName = "edd_relations";
|
||||
|
||||
private static int batchSize = 500;
|
||||
|
||||
//读指针
|
||||
public int readPointer(String table, String source, String target, int minId){
|
||||
int pointer = minId;
|
||||
try {
|
||||
pointer = dbSource.getPointer(table, source, target);
|
||||
} catch(Exception e) {
|
||||
logger.info("No such pointer! Create one");
|
||||
dbSource.insertPointer(table, source, target, pointer);
|
||||
}
|
||||
return pointer;
|
||||
}
|
||||
|
||||
public void start(){
|
||||
int minId = 1;
|
||||
int maxId = dbSource.getMaxId(sourceTableName);
|
||||
|
||||
while(true){
|
||||
startId1 = readPointer(pointerTableName, sourceTableName, targetTableName, minId);
|
||||
if(startId1 > maxId){
|
||||
logger.info("finish program! HAHAHA");
|
||||
System.exit(0);
|
||||
}
|
||||
List<GatherProjectsModel> gpmList1 = gatherDao.selectGPMList(sourceTableName, startId1, batchSize, maxId);
|
||||
if(gpmList1.size() == 0){
|
||||
try {
|
||||
logger.info("no increase item to be handled! handle update items");
|
||||
List<GatherProjectsModel> updateProjectList = gatherDao.selectUpdateProjects(sourceTableName);
|
||||
for(GatherProjectsModel model:updateProjectList){
|
||||
mergeProjectNew.handleNewProject(model, true);
|
||||
gatherDao.updateUpdateMark(sourceTableName, null, model.getId());
|
||||
}
|
||||
logger.info("all update items have been solved. Sleep 1h");
|
||||
Thread.sleep(3600*1000);
|
||||
continue;
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
for(GatherProjectsModel model1:gpmList1){
|
||||
logger.info("handling project : " + model1.getId());
|
||||
mergeProjectNew.handleNewProject(model1, false);
|
||||
|
||||
//如果是对更新表中的数据进行操作 则不更新指针
|
||||
dbSource.updatePointer(pointerTableName, sourceTableName, targetTableName, model1.getId() + 1);
|
||||
gatherDao.updateUpdateMark(sourceTableName, null, model1.getId()); //需要更新update_mark字段 因为按顺序取的时候可能取到1的
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static void main(String[] args){
|
||||
ApplicationContext applicationContext = new ClassPathXmlApplicationContext("classpath:/applicationContext*.xml");
|
||||
MergeProjects Main = applicationContext.getBean(MergeProjects.class);
|
||||
Main.start();
|
||||
}
|
||||
}
|
||||
package com.ossean;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.context.support.ClassPathXmlApplicationContext;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import com.ossean.databaseSource.DBSource;
|
||||
import com.ossean.databaseSource.GatherDao;
|
||||
import com.ossean.databaseSource.UpdateControlProjectsDao;
|
||||
import com.ossean.model.GatherProjectsModel;
|
||||
import com.ossean.util.MergeProjectNew;
|
||||
import com.ossean.util.MergeProjectsUtil;
|
||||
|
||||
@Component
|
||||
public class MergeProjects {
|
||||
Logger logger = Logger.getLogger(this.getClass());
|
||||
@Resource
|
||||
private DBSource dbSource;
|
||||
@Resource
|
||||
private GatherDao gatherDao;
|
||||
@Resource
|
||||
private UpdateControlProjectsDao updateControlDao;
|
||||
|
||||
@Qualifier("mergeProjectNew")
|
||||
@Autowired
|
||||
private MergeProjectNew mergeProjectNew;
|
||||
|
||||
private int startId1;
|
||||
|
||||
private static String pointerTableName = "edd_pointers";
|
||||
private static String sourceTableName = "gather_projects";
|
||||
private static String targetTableName = "edd_relations";
|
||||
|
||||
private static int batchSize = 500;
|
||||
|
||||
//读指针
|
||||
public int readPointer(String table, String source, String target, int minId){
|
||||
int pointer = minId;
|
||||
try {
|
||||
pointer = dbSource.getPointer(table, source, target);
|
||||
} catch(Exception e) {
|
||||
logger.info("No such pointer! Create one");
|
||||
dbSource.insertPointer(table, source, target, pointer);
|
||||
}
|
||||
return pointer;
|
||||
}
|
||||
|
||||
public void start(){
|
||||
//int minId = 1;
|
||||
//int maxId = dbSource.getMaxId(sourceTableName);
|
||||
int count=0;
|
||||
count = readPointer(pointerTableName, sourceTableName, targetTableName, count);
|
||||
|
||||
while(true){
|
||||
//startId1 = readPointer(pointerTableName, sourceTableName, targetTableName, minId);
|
||||
// if(startId1 > maxId){
|
||||
//
|
||||
// try {
|
||||
// logger.info("没有数据了,休眠10min");
|
||||
// Thread.sleep(600*1000L);
|
||||
// continue;
|
||||
// } catch (InterruptedException e) {
|
||||
// e.printStackTrace();
|
||||
// }
|
||||
// }
|
||||
List<GatherProjectsModel> gpmList1 = gatherDao.selectGPMList(sourceTableName,batchSize);
|
||||
|
||||
if(gpmList1.size() == 0){
|
||||
try {
|
||||
logger.info("no increase item to be handled! handle update items");
|
||||
List<GatherProjectsModel> updateProjectList = gatherDao.selectUpdateProjects(sourceTableName,batchSize);
|
||||
if(updateProjectList.size() == 0){
|
||||
logger.info("all update items have been solved. Sleep 10 mins");
|
||||
Thread.sleep(600*1000);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
for(GatherProjectsModel model:updateProjectList){
|
||||
mergeProjectNew.handleNewProject(model, true);
|
||||
gatherDao.updateUpdateMark(sourceTableName, 1, model.getId());
|
||||
}
|
||||
|
||||
} catch (InterruptedException e) {
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
long start = System.currentTimeMillis();
|
||||
for(GatherProjectsModel model1:gpmList1){
|
||||
logger.info("handling project : " + model1.getId());
|
||||
mergeProjectNew.handleNewProject(model1, false);
|
||||
count = count+1;
|
||||
dbSource.updatePointer(pointerTableName, sourceTableName, targetTableName, count);
|
||||
gatherDao.updateUpdateMark(sourceTableName, 1, model1.getId()); //需要更新update_mark字段 因为按顺序取的时候可能取到1的
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
logger.warn("deal with: 500 projects cost: "+(float)(end - start)/60000+" minutes");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
public static void main(String[] args){
|
||||
ApplicationContext applicationContext = new ClassPathXmlApplicationContext("classpath:/applicationContext*.xml");
|
||||
MergeProjects Main = applicationContext.getBean(MergeProjects.class);
|
||||
Main.start();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -73,7 +73,7 @@ public class MergeProjectsOld {
|
|||
}else{}
|
||||
while(true){
|
||||
startId1 = readPointer(pointerTableName, sourceTableName, edd_relations_pointer, minId);
|
||||
List<GatherProjectsModel> gpmList1 = gatherDao.selectGPMList(sourceTableName, startId1, batchSize, maxId);
|
||||
List<GatherProjectsModel> gpmList1 = gatherDao.selectGPMList(sourceTableName, batchSize);
|
||||
if(gpmList1.size() == 0){
|
||||
try {
|
||||
logger.info("no increase item to be handled! handle update items");
|
||||
|
@ -92,7 +92,7 @@ public class MergeProjectsOld {
|
|||
List<GatherProjectsModel> updateProjectList = gatherDao.selectUpdateProjects(sourceTableName);
|
||||
for(GatherProjectsModel model:updateProjectList){
|
||||
mergeProjectNew.handleNewProject(model, true);
|
||||
gatherDao.updateUpdateMark(sourceTableName, null, model.getId());
|
||||
gatherDao.updateUpdateMark(sourceTableName, 1, model.getId());
|
||||
}
|
||||
logger.info("all update items have been solved. Sleep 1h");
|
||||
Thread.sleep(3600*1000);
|
||||
|
@ -110,7 +110,7 @@ public class MergeProjectsOld {
|
|||
|
||||
//如果是对更新表中的数据进行操作 则不更新指针
|
||||
dbSource.updatePointer(pointerTableName, sourceTableName, edd_relations_pointer, model1.getId() + 1);
|
||||
gatherDao.updateUpdateMark(sourceTableName, null, model1.getId()); //需要更新update_mark字段 因为按顺序取的时候可能取到1的
|
||||
gatherDao.updateUpdateMark(sourceTableName, 1, model1.getId()); //需要更新update_mark字段 因为按顺序取的时候可能取到1的
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -26,13 +26,13 @@ public interface DBDest {
|
|||
*/
|
||||
//向open_source_projects表中插入对象数据
|
||||
@Insert("insert into ${table} (id,name,description,"
|
||||
+ "url,url_md5,language,category,"
|
||||
+ "url,language,category,"
|
||||
+ "source,created_time,updated_time,"
|
||||
+ "tags,tags_for_search,synonyms,license,homepage,extracted_time,update_mark) values (#{model.id},#{model.name},#{model.description},"
|
||||
+ "#{model.url},#{model.url_md5},#{model.language},"
|
||||
+ "tags,tags_for_search,synonyms,update_mark) values (#{model.id},#{model.name},#{model.description},"
|
||||
+ "#{model.url},,#{model.language},"
|
||||
+ "#{model.category},#{model.source},"
|
||||
+ "#{model.created_time},#{model.updated_time},#{model.tags},"
|
||||
+ "#{model.tags_for_search},#{model.synonyms},#{model.license},#{model.homepage},#{model.extracted_time},#{model.update_mark})")
|
||||
+ "#{model.tags_for_search},#{model.synonyms},#{model.update_mark})")
|
||||
public void insertOsp(@Param("table") String table, @Param("model") OpenSourceProject model);
|
||||
|
||||
//查找刚刚插入open_source_projects表中的记录id
|
||||
|
|
|
@ -87,8 +87,10 @@ public interface GatherDao {
|
|||
|
||||
|
||||
//根据id和limit取GatherProjectsModel列表
|
||||
@Select("select * from ${table} where id>=#{start} and id<=#{maxId} order by id asc limit #{size}")
|
||||
public List<GatherProjectsModel> selectGPMList(@Param("table") String table, @Param("start") int start, @Param("size") int size, @Param("maxId") int maxId);
|
||||
//@Select("select * from ${table} where id>=#{start} and id<=#{maxId} order by id asc limit #{size}")
|
||||
//public List<GatherProjectsModel> selectGPMList(@Param("table") String table, @Param("start") int start, @Param("size") int size, @Param("maxId") int maxId);
|
||||
@Select("select * from ${table} where update_mark = 0 limit #{size}")
|
||||
public List<GatherProjectsModel> selectGPMList(@Param("table") String table, @Param("size") int size);
|
||||
|
||||
@Select("select * from ${table} where id>=#{start} and id<#{end} limit #{size}")
|
||||
public List<GatherProjectsModel> selectGPMListBySize(@Param("table") String table, @Param("start") int start, @Param("end") int end, @Param("size") int size);
|
||||
|
@ -106,12 +108,12 @@ public interface GatherDao {
|
|||
public List<GatherProjectsModel> selectGPMBySameName(@Param("table") String table, @Param("name") String name, @Param("id") int id);
|
||||
|
||||
|
||||
//取出所有update_mark为1的更新数据
|
||||
@Select("select * from ${table} where update_mark=1")
|
||||
public List<GatherProjectsModel> selectUpdateProjects(@Param("table") String table);
|
||||
//取出所有update_mark为2的更新数据
|
||||
@Select("select * from ${table} where update_mark=2 limit #{size}")
|
||||
public List<GatherProjectsModel> selectUpdateProjects(@Param("table") String table,@Param("size") int size);
|
||||
|
||||
//更新update_mark字段
|
||||
@Select("update ${table} set update_mark=#{update_mark} where id=#{id}")
|
||||
public void updateUpdateMark(@Param("table") String table, @Param("update_mark") String update_mark, @Param("id") int id);
|
||||
public void updateUpdateMark(@Param("table") String table, @Param("update_mark") int update_mark, @Param("id") int id);
|
||||
|
||||
}
|
|
@ -45,7 +45,7 @@ public class StringHandler {
|
|||
}
|
||||
}
|
||||
resultStrArr = tmp.toArray(new String[0]);
|
||||
System.out.println("bangbangbang"+resultStrArr[0]+" "+resultStrArr[1]);
|
||||
//System.out.println("bangbangbang"+resultStrArr[0]+" "+resultStrArr[1]);
|
||||
return resultStrArr;
|
||||
}
|
||||
|
||||
|
|
|
@ -23,6 +23,16 @@
|
|||
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
|
||||
</layout>
|
||||
</appender>
|
||||
|
||||
<appender name="file_log1" class="org.apache.log4j.DailyRollingFileAppender">
|
||||
<param name="File" value="./log/time_cost.txt" />
|
||||
<param name="threshold" value="WARN" />
|
||||
<layout class="org.apache.log4j.PatternLayout">
|
||||
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
|
||||
</layout>
|
||||
</appender>
|
||||
|
||||
|
||||
|
||||
|
||||
<!-- 邮件(只有ERROR时才会发送!) -->
|
||||
|
@ -47,13 +57,13 @@
|
|||
<level value="warn" />
|
||||
<appender-ref ref="stdout" />
|
||||
</logger>
|
||||
|
||||
|
||||
|
||||
<root>
|
||||
<level value="info" />
|
||||
<appender-ref ref="stdout" />
|
||||
<appender-ref ref="file" />
|
||||
<appender-ref ref="file_log" />
|
||||
<appender-ref ref="file_log1" />
|
||||
</root>
|
||||
|
||||
</log4j:configuration>
|
|
@ -1,126 +0,0 @@
|
|||
package foo;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import javax.annotation.Resource;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.RunWith;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Qualifier;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.test.context.ContextConfiguration;
|
||||
import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
|
||||
|
||||
import com.ossean.databaseSource.DBSource;
|
||||
import com.ossean.databaseSource.GatherDao;
|
||||
import com.ossean.model.GatherProjectsModel;
|
||||
import com.ossean.util.MergeProjectNew;
|
||||
import com.ossean.util.MergeProjectsUtil;
|
||||
import com.ossean.util.RegexHandler;
|
||||
import com.ossean.util.StringHandler_ProjectName;
|
||||
|
||||
@RunWith(SpringJUnit4ClassRunner.class)
|
||||
@ContextConfiguration(locations = {"classpath*:/applicationContext*.xml"})
|
||||
public class Testcase {
|
||||
|
||||
@Autowired
|
||||
ApplicationContext ctx;
|
||||
|
||||
Logger logger = Logger.getLogger(this.getClass());
|
||||
@Resource
|
||||
private DBSource dbSource;
|
||||
@Resource
|
||||
private GatherDao gatherDao;
|
||||
|
||||
@Qualifier("mergeProjectsUtil")
|
||||
@Autowired
|
||||
private MergeProjectsUtil mergeProjectsUtil;
|
||||
|
||||
@Qualifier("mergeProjectNew")
|
||||
@Autowired
|
||||
private MergeProjectNew mergeProjectNew;
|
||||
|
||||
|
||||
public static void main(String[] args) {
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testGetSynonyms(){
|
||||
List<GatherProjectsModel> prj_list = gatherDao.selectGPMList("gather_projects_test", 11000, 100, 1135058);
|
||||
GatherProjectsModel gpm = gatherDao.selectGPMById("gather_projects", 11085);//1220,1995,1194,11085
|
||||
GatherProjectsModel gpm2 = gatherDao.selectGPMById("gather_projects", 237094);
|
||||
List<String> list= mergeProjectNew.getSynonyms(gpm);
|
||||
String str = "";
|
||||
for (int j = 0;j<list.size();j++){
|
||||
str =str+list.get(j)+",";
|
||||
}
|
||||
System.out.println("prj id:"+gpm.getId()+"; prj name:"+gpm.getName()+"; synonyms:"+str);
|
||||
// for(int i=0;i<prj_list.size();i++){
|
||||
// List<String> list= mergeProjectNew.getSynonyms(prj_list.get(i));
|
||||
//
|
||||
// String str = "";
|
||||
// for (int j = 0;j<list.size();j++){
|
||||
// str =str+list.get(j)+",";
|
||||
// }
|
||||
// System.out.println("prj id:"+prj_list.get(i).getId()+"; prj name:"+prj_list.get(i).getName()+"; synonyms:"+str);
|
||||
// }
|
||||
|
||||
|
||||
// String a = "abc-hu";
|
||||
// System.out.println(RegexHandler.extractEngDecChiAndDot(a));
|
||||
|
||||
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCalSimilarityForTag(){
|
||||
GatherProjectsModel gpm_1 = gatherDao.selectGPMById("gather_projects", 87361);
|
||||
GatherProjectsModel gpm_2 = gatherDao.selectGPMById("gather_projects", 64);
|
||||
//GatherProjectsModel gpm_3 = gatherDao.selectGPMById("gather_projects", 99700);87361
|
||||
// 770684
|
||||
// 907638
|
||||
// 1032900
|
||||
// 1032911
|
||||
//boolean isSame = mergeProjectNew.isTheSameAfterTFIDF(gpm_1, gpm_2, false);
|
||||
// double similarity = mergeProjectNew.calSimilarityByDescription(gpm_1, gpm_2);
|
||||
// System.out.println("calSimilarityByDesc"+String.valueOf(similarity));
|
||||
// double similarity2 = mergeProjectNew.calSimilarityByTag(gpm_1, gpm_2);
|
||||
// System.out.println("calSimilarityByTag"+String.valueOf(similarity2));
|
||||
mergeProjectNew.handleNewProject(gpm_1, false);
|
||||
//System.out.println("the two model is same: "+isSame);
|
||||
|
||||
// List<String> synonymList = mergeProjectsUtil.getSynonyms(gpm_2);
|
||||
// for (int i =0;i<synonymList.size();i++)
|
||||
// System.out.println(synonymList.get(i));
|
||||
}
|
||||
|
||||
// 标签分离函数
|
||||
public static List<String> tagsSegmentation(String tags) {
|
||||
List<String> tag = new ArrayList<String>();
|
||||
|
||||
if (tags != null) {
|
||||
String regex = "<[^<>]*>";
|
||||
//一个Pattern是一个正则表达式经编译后的表现模式。
|
||||
Pattern pattern = Pattern.compile(regex);
|
||||
//一个Matcher对象是一个状态机器,它依据Pattern对象做为匹配模式对字符串展开匹配检查。
|
||||
Matcher matcher = pattern.matcher(tags);
|
||||
|
||||
while (matcher.find()) {
|
||||
String t = matcher.group();
|
||||
t = t.substring(1, t.length() - 1);
|
||||
|
||||
tag.add(t);
|
||||
}
|
||||
}
|
||||
return tag;
|
||||
}
|
||||
|
||||
}
|
|
@ -1,5 +1,10 @@
|
|||
source 'http://ruby.taobao.org/'
|
||||
<<<<<<< HEAD
|
||||
#source 'http://ruby.taobao.org/'
|
||||
source 'https://rubygems.org'
|
||||
=======
|
||||
source 'https://ruby.taobao.org/'
|
||||
#source 'https://rubygems.org'
|
||||
>>>>>>> tmp
|
||||
|
||||
unless RUBY_PLATFORM =~ /w32/
|
||||
# unix-like only
|
||||
|
|
|
@ -60,11 +60,11 @@ class OpenSourceProjectsController < ApplicationController
|
|||
|
||||
#@open_source_project.update_column(:view_num_ossean, (@open_source_project.view_num_ossean.to_i+1))
|
||||
#@open_source_project.save
|
||||
@pops=@open_source_project.open_source_project_popularities.where("year_col >= 2013").order("year_col ASC, month_col ASC")
|
||||
@hot_words=@open_source_project.hot_words
|
||||
# @pops=@open_source_project.open_source_project_popularities.where("year_col >= 2013").order("year_col ASC, month_col ASC")
|
||||
# @hot_words=@open_source_project.hot_words
|
||||
@memo = RelativeMemo.new(:open_source_project => @open_source_project)
|
||||
@topics_count = @open_source_project.relative_memos_num
|
||||
@bugs = @open_source_project.relative_bugs.limit(6)
|
||||
# @bugs = @open_source_project.relative_bugs.limit(6)
|
||||
|
||||
###########推荐#############################################################
|
||||
@simility_osp_array ,@simility_id_array,@simility_weight_array ,@simility_osp_des= recommend(params[:id],"simility")
|
||||
|
@ -205,6 +205,32 @@ class OpenSourceProjectsController < ApplicationController
|
|||
language = ''
|
||||
language << @language
|
||||
#search_words << ' ' + @language if @language
|
||||
|
||||
# 请求starlee服务
|
||||
require 'net/http'
|
||||
|
||||
star_projects = []
|
||||
parse_string = "http://172.16.128.35:5000/correlation_search?q=" + @name
|
||||
|
||||
# parse_string = "http://172.20.10.3:5000/correlation_search?q=" + @name
|
||||
parse_string.gsub! " ","%20"
|
||||
url = URI.parse(parse_string)
|
||||
req = Net::HTTP::Get.new(url.to_s)
|
||||
res = Net::HTTP.start(url.host, url.port) {|http|
|
||||
http.request(req)
|
||||
}
|
||||
star_items = res.body
|
||||
require 'json'
|
||||
star_items = JSON.parse(star_items)["items"]
|
||||
star_project_ids = star_items[0,5]
|
||||
star_projects = []
|
||||
# 查询出id对应的项目
|
||||
star_project_ids.each do |id|
|
||||
id = id.to_i
|
||||
project = OpenSourceProject.find_by_id(id)
|
||||
star_projects << project
|
||||
end
|
||||
|
||||
search = OpenSourceProject.search do
|
||||
without(:filtration,0)
|
||||
with(:created_time, Date.new(params[:created_time].to_i, 01, 01)..Date.new(params[:created_time].to_i+1, 01, 01)) if params[:created_time].present? && !('earlier'.eql? params[:created_time])
|
||||
|
@ -240,12 +266,36 @@ class OpenSourceProjectsController < ApplicationController
|
|||
else
|
||||
order_by(:score, :desc)
|
||||
end
|
||||
paginate :page => params[:page], :per_page => 10 #the default paginator of solr,10 records for each page
|
||||
paginate :page => params[:page], :per_page => 10 # the default paginator of solr,10 records for each page
|
||||
end
|
||||
per_page_option = 10
|
||||
@hits = search.hits
|
||||
@open_source_projects = search.results
|
||||
@projects_count = search.total #get total count of search
|
||||
|
||||
open_source_projects_index = 0
|
||||
while open_source_projects_index<@open_source_projects.length do
|
||||
project = @open_source_projects[open_source_projects_index]
|
||||
id = project.id
|
||||
star_projects.each do |star_project|
|
||||
if id.to_i == star_project.id
|
||||
@open_source_projects.delete(project)
|
||||
open_source_projects_index -= 1
|
||||
break
|
||||
end
|
||||
end
|
||||
open_source_projects_index += 1
|
||||
end
|
||||
|
||||
if params[:page]==nil || params[:page]==1
|
||||
star_index = star_projects.length-1
|
||||
while star_index>=0 do
|
||||
star_project = star_projects[star_index]
|
||||
@open_source_projects.unshift(star_project)
|
||||
star_index -= 1
|
||||
end
|
||||
end
|
||||
|
||||
# if @projects_count <= 10
|
||||
# search_words1 = search_words.unpack("U*").select{ |p| (0x4e00..0x9fa5).member? p }.pack("U*")
|
||||
# if search_words.include?('客户端')
|
||||
|
@ -271,6 +321,8 @@ class OpenSourceProjectsController < ApplicationController
|
|||
# @projects_count = search.total + search_1.total
|
||||
# end
|
||||
@projects_pages = Paginator.new @projects_count, per_page_option, params['page'] #custom paginator,to show the 10 records the default paginator of solr gets
|
||||
|
||||
|
||||
respond_to do |format|
|
||||
format.html { render :layout => "base" }
|
||||
end
|
||||
|
@ -408,7 +460,8 @@ class OpenSourceProjectsController < ApplicationController
|
|||
@created_time = params[:created_time]
|
||||
|
||||
@projects_count=OpenSourceProject.count
|
||||
@posts_count = RelativeMemo.count
|
||||
# @posts_count = RelativeMemo.count
|
||||
@posts_count = 14243800
|
||||
#@open_source_projects=OpenSourceProject.find_hot_osps(20)
|
||||
@projects_for_cloud = OpenSourceProject.where("filtration != 0").order("composite_score desc").limit(60)
|
||||
|
||||
|
|
|
@ -1689,11 +1689,11 @@ module ApplicationHelper
|
|||
# nav_list.push(stores_link) if @nav_dispaly_store_all_label
|
||||
|
||||
#osp_index
|
||||
nav_list.push(osp_index_link) if @nav_dispaly_osp_index_label
|
||||
# nav_list.push(osp_index_link) if @nav_dispaly_osp_index_label
|
||||
#highchart_index
|
||||
nav_list.push(highchart_index_link) if @nav_dispaly_osp_index_label
|
||||
# nav_list.push(highchart_index_link) if @nav_dispaly_osp_index_label
|
||||
#sta_git_match
|
||||
nav_list.push(sta_git_match_link) if @nav_dispaly_osp_index_label
|
||||
# nav_list.push(sta_git_match_link) if @nav_dispaly_osp_index_label
|
||||
|
||||
content_li = ''
|
||||
nav_list.collect do |nav_item|
|
||||
|
|
|
@ -246,12 +246,12 @@ class OpenSourceProject < ActiveRecord::Base
|
|||
#tag_ids = ActsAsTaggableOn::Tagging.select(:tag_id).where("taggable_id = ? AND taggable_type = ? AND disagree_num > ?", obj_id, "OpenSourceProject", 5)
|
||||
# ActsAsTaggableOn::Tag.includes(ActsAsTaggableOn::Tagging).where("taggable_id = ? AND taggable_type = ? AND disagree_num > ?", obj_id, "OpenSourceProject", -1)
|
||||
#tag_list = ActsAsTaggableOn::Tag.select(:name).where(id:tag_ids)
|
||||
tag_list = ActsAsTaggableOn::Tag.find_by_sql ["SELECT `tags`.* FROM `tags`"+
|
||||
" INNER JOIN `taggings` ON `tags`.`id` = `taggings`.`tag_id` WHERE"+
|
||||
"`taggings`.`taggable_id` = #{self.id}"+
|
||||
" AND `taggings`.`taggable_type` = 'OpenSourceProject' AND `taggings`.`disagree_num` > 5" +
|
||||
" AND (taggings.context = 'tags' AND taggings.tagger_id IS NULL) LIMIT #{limit}"]
|
||||
return tag_list
|
||||
# tag_list = ActsAsTaggableOn::Tag.find_by_sql ["SELECT `tags`.* FROM `tags`"+
|
||||
# " INNER JOIN `taggings` ON `tags`.`id` = `taggings`.`tag_id` WHERE"+
|
||||
# "`taggings`.`taggable_id` = #{self.id}"+
|
||||
# " AND `taggings`.`taggable_type` = 'OpenSourceProject' AND `taggings`.`disagree_num` > 5" +
|
||||
# " AND (taggings.context = 'tags' AND taggings.tagger_id IS NULL) LIMIT #{limit}"]
|
||||
# return tag_list
|
||||
end
|
||||
|
||||
def get_tag_list_userTag(limit = 9)
|
||||
|
|
|
@ -18,7 +18,7 @@
|
|||
"http://www.miibeian.gov.cn/", :target => "_blank" %></span>
|
||||
|
||||
</p>
|
||||
<div id="logo_link">
|
||||
<!-- <div id="logo_link">
|
||||
<span class="footer_logo_link"><%= link_to image_tag('/images/footer_logo/nudt.png',
|
||||
:size=>'100x30',:alt=>"国防科学技术大学计算机学院"),
|
||||
"http://www.nudt.edu.cn/special.asp?classid=12", :target => "_blank" %></span>
|
||||
|
@ -34,7 +34,7 @@
|
|||
<span class="footer_logo_link"><%= link_to image_tag('/images/footer_logo/inforbus.png',
|
||||
:size=>'100x30',:alt=>"山东中创软件商用中间件股份有限公司"),
|
||||
"http://www.inforbus.com", :target => "_blank" %></span>
|
||||
</div>
|
||||
</div> -->
|
||||
<!--gcm-->
|
||||
|
||||
</div>
|
||||
|
|
|
@ -75,7 +75,7 @@
|
|||
</div>
|
||||
<% end -%>
|
||||
<%#= render_menu :top_menu if User.current.logged? || !Setting.login_required? -%>
|
||||
<%= render_dynamic_nav if User.current.logged? || !Setting.login_required? -%>
|
||||
<%#= render_dynamic_nav if User.current.logged? || !Setting.login_required? -%>
|
||||
<%# 自建导航条在base页面中以 (@nav_dispaly......) 开头变量设定, 全局搜索即可发现 %>
|
||||
</div>
|
||||
<div style="border-top:solid 1px #C6E9F1;width:940px;margin-left:auto;margin-right:auto;margin-bottom: 0px;margin-top: -10px;"></div>
|
||||
|
|
|
@ -52,7 +52,7 @@
|
|||
<div style="width: 940px; margin: auto;">
|
||||
<div>
|
||||
<div class="welcome_logo">
|
||||
<%= link_to image_tag("/images/logo.png", weight: "36px", height: "36px"), home_path %>
|
||||
<!-- <%= link_to image_tag("/images/logo.png", weight: "36px", height: "36px"), home_path %> -->
|
||||
</div>
|
||||
<% if User.current.logged? -%>
|
||||
<div id="loggedas">
|
||||
|
@ -75,7 +75,7 @@
|
|||
<% end -%>
|
||||
|
||||
<%#= render_menu :top_menu if User.current.logged? || !Setting.login_required? -%>
|
||||
<%= render_dynamic_nav if User.current.logged? || !Setting.login_required? -%>
|
||||
<%#= render_dynamic_nav if User.current.logged? || !Setting.login_required? -%>
|
||||
<%# 自建导航条在base页面中以 (@nav_dispaly......) 开头变量设定, 全局搜索即可发现 %>
|
||||
</div>
|
||||
<div style="clear: both;"></div>
|
||||
|
|
|
@ -24,9 +24,9 @@
|
|||
<p class="stats">
|
||||
<!--%= link_to project_info.followers_num, open_source_project_path(project) %-->
|
||||
<a>
|
||||
<%= (project.relative_memos_num.nil?)?0:project.relative_memos_num.to_i %>
|
||||
<!-- <%= (project.relative_memos_num.nil?)?0:project.relative_memos_num.to_i %> -->
|
||||
</a>
|
||||
<%= content_tag('span', l(:label_x_relative_topics_count, :count => 0)) %>
|
||||
<!-- <%= content_tag('span', l(:label_x_relative_topics_count, :count => 0)) %> -->
|
||||
</p>
|
||||
|
||||
|
||||
|
@ -35,10 +35,10 @@
|
|||
<p class="stats">
|
||||
<!--%= link_to project_info.visit_num, open_source_project_path(project) %-->
|
||||
<a>
|
||||
<%= short_num (project.view_num_ossean.nil?)?0:project.view_num_ossean %>
|
||||
<!-- <%= short_num (project.view_num_ossean.nil?)?0:project.view_num_ossean %> -->
|
||||
<%#= short_num ((project.view_num.nil?)?0:project.view_num)+project.view_num_ossean+project.relative_topics.sum('view_num') %>
|
||||
</a>
|
||||
<%= content_tag('span', l(:label_view_num)) %>
|
||||
<%#= content_tag('span', l(:label_view_num)) %>
|
||||
</p>
|
||||
<!--<p class="stats">-->
|
||||
<!--%=link_to project.code_line %-->
|
||||
|
|
|
@ -17,16 +17,16 @@
|
|||
</div>
|
||||
<div class="wiki-description">
|
||||
<%#= textilizable(project.short_description, :project => project) %>
|
||||
<%= show_project_descriptions project.result.description, 300 %>
|
||||
<%= show_project_descriptions project.description, 300 %>
|
||||
</div>
|
||||
|
||||
<div class="information">
|
||||
<p class="stats">
|
||||
<!--%= link_to project_info.followers_num, open_source_project_path(project) %-->
|
||||
<%#= link_to project_info.followers_num, open_source_project_path(project) %>
|
||||
<a>
|
||||
<%= (project.result.relative_memos_num.nil?)?0:project.result.relative_memos_num.to_i %>
|
||||
<%#= (project.result.relative_memos_num.nil?)?0:project.result.relative_memos_num.to_i %>
|
||||
</a>
|
||||
<%= content_tag('span', l(:label_x_relative_topics_count, :count => 0)) %>
|
||||
<%#= content_tag('span', l(:label_x_relative_topics_count, :count => 0)) %>
|
||||
</p>
|
||||
|
||||
<!--
|
||||
|
@ -42,10 +42,10 @@
|
|||
<p class="stats">
|
||||
<!--%= link_to project_info.visit_num, open_source_project_path(project) %-->
|
||||
<a>
|
||||
<%= short_num((project.result.view_num_ossean.nil?)?0:project.result.view_num_ossean)%>
|
||||
<%#= short_num((project.result.view_num_ossean.nil?)?0:project.result.view_num_ossean)%>
|
||||
<%#= short_num ((project.view_num.nil?)?0:project.view_num)+project.view_num_ossean+project.relative_topics.sum('view_num') %>
|
||||
</a>
|
||||
<%= content_tag('span', l(:label_view_num)) %>
|
||||
<%#= content_tag('span', l(:label_view_num)) %>
|
||||
</p>
|
||||
|
||||
<!-- <p class="stats">
|
||||
|
@ -78,7 +78,7 @@
|
|||
<!--</div>-->
|
||||
<div class="licences">
|
||||
<%= content_tag('span', "#{l(:label_create_time)}: ") %>
|
||||
<%= content_tag('span', project.result.created_time.to_s[0,10]) %>
|
||||
<%= content_tag('span', project.created_time.to_s[0,10]) %>
|
||||
</div>
|
||||
|
||||
<!--gcm-->
|
||||
|
@ -99,6 +99,6 @@
|
|||
<div class="tags">
|
||||
<div id="tags">
|
||||
<%= image_tag("/images/sidebar/tags.png") %>
|
||||
<%= render :partial => 'tags/tag_name', :locals => {:obj => project.result, :object_flag => "8", :non_list_all => true} %>
|
||||
<%= render :partial => 'tags/tag_name', :locals => {:obj => project, :object_flag => "8", :non_list_all => true} %>
|
||||
</div>
|
||||
</div>
|
||||
|
|
|
@ -86,7 +86,7 @@
|
|||
<%= text_field_tag :name, nil, placeholder:'请输入要搜索的关键字', :size => 27, :class => "topics-search-text-field" %>
|
||||
</div>
|
||||
<%#= hidden_field_tag 'project_type', project_type %>
|
||||
<%= submit_tag l(:label_search_topics), :class => "enterprise", :name => nil, :onclick => "user_trace_search_knowledge_in_search_bar('#{request.session_options[:id]}');" %>
|
||||
<%#= submit_tag l(:label_search_topics), :class => "enterprise", :name => nil, :onclick => "user_trace_search_knowledge_in_search_bar('#{request.session_options[:id]}');" %>
|
||||
</div>
|
||||
<% end %>
|
||||
<%###################search topics###########################%>
|
||||
|
|
|
@ -207,15 +207,33 @@
|
|||
<div data-group="nav-topbar">
|
||||
<div class="nav-topbar-content">
|
||||
<ul>
|
||||
<li class="nav-search-con">查找条件><span class="nav-topbar-arror"></span>
|
||||
<!-- <li class="nav-search-con">查找条件><span class="nav-topbar-arror"></span>
|
||||
</li>
|
||||
<li>
|
||||
<span class="topbar-search J_TopbarSearch">
|
||||
|
||||
<%= show_condition(@app_dir, @language, @created_time, params[:name]) %>
|
||||
</span>
|
||||
</li>
|
||||
-->
|
||||
<%#= show_condition(@app_dir, @language, @created_time, params[:name]) %>
|
||||
<!-- </span>
|
||||
</li> -->
|
||||
<li class="nav-search-con">共 <span class="h"><%= @projects_count %></span> 个开源项目</li>
|
||||
|
||||
<li class="nav-search-con" style="margin-left: 50px">开发语言:
|
||||
<%= link_to "JAVA", search_open_source_projects_path(:language => "JAVA", :app_dir => @app_dir, :created_time => @created_time, :name => params[:name]), :class => "nav-more J_More show" %>
|
||||
<%= link_to "C++", search_open_source_projects_path(:language => "c++", :app_dir => @app_dir, :created_time => @created_time, :name => params[:name]), :class => "nav-more J_More show" %>
|
||||
<%= link_to "C", search_open_source_projects_path(:language => "c", :app_dir => @app_dir, :created_time => @created_time, :name => params[:name]), :class => "nav-more J_More show" %>
|
||||
<%= link_to "C#", search_open_source_projects_path(:language => "c#", :app_dir => @app_dir, :created_time => @created_time, :name => params[:name]), :class => "nav-more J_More show" %>
|
||||
<%= link_to "ruby", search_open_source_projects_path(:language => "ruby", :app_dir => @app_dir, :created_time => @created_time, :name => params[:name]), :class => "nav-more J_More show" %>
|
||||
</li>
|
||||
|
||||
<li class="nav-search-con" style="margin-left: 50px">创立时间:
|
||||
<%= link_to "2014", search_open_source_projects_path(:app_dir => @app_dir, :language => @language, :created_time => "2014", :name => params[:name]), :class => "nav-more J_More show" %>
|
||||
<%= link_to "2013", search_open_source_projects_path(:app_dir => @app_dir, :language => @language, :created_time => "2013", :name => params[:name]), :class => "nav-more J_More show" %>
|
||||
<%= link_to "2012", search_open_source_projects_path(:app_dir => @app_dir, :language => @language, :created_time => "2012", :name => params[:name]), :class => "nav-more J_More show" %>
|
||||
<%= link_to "2011", search_open_source_projects_path(:app_dir => @app_dir, :language => @language, :created_time => "2011", :name => params[:name]), :class => "nav-more J_More show" %>
|
||||
<%= link_to "2010", search_open_source_projects_path(:app_dir => @app_dir, :language => @language, :created_time => "2010", :name => params[:name]), :class => "nav-more J_More show" %>
|
||||
<%= link_to "更早", search_open_source_projects_path(:app_dir => @app_dir, :language => @language, :created_time => "earlier", :name => params[:name]), :class => "nav-more J_More show" %>
|
||||
|
||||
</li>
|
||||
<!-- <li class="related-count"><a href="/search?q=iphone&app=vproduct&vlist=1&from_combo=true">8款相关产品</a></li> -->
|
||||
</ul>
|
||||
</div>
|
||||
|
@ -237,7 +255,7 @@
|
|||
<span class="arrow arrow-bottom"></span>
|
||||
</a>
|
||||
</div> -->
|
||||
<div class="nav-category ">
|
||||
<!-- <div class="nav-category ">
|
||||
<h4 title="网络类型">开发语言<span>:</span></h4>
|
||||
<%= link_to "JAVA", search_open_source_projects_path(:language => "JAVA", :app_dir => @app_dir, :created_time => @created_time, :name => params[:name]), :class => "nav-more J_More show" %>
|
||||
<%= link_to "C++", search_open_source_projects_path(:language => "c++", :app_dir => @app_dir, :created_time => @created_time, :name => params[:name]), :class => "nav-more J_More show" %>
|
||||
|
@ -248,8 +266,8 @@
|
|||
|
||||
<span class="arrow arrow-bottom"></span>
|
||||
</a>
|
||||
</div>
|
||||
<div class="nav-category ">
|
||||
</div> -->
|
||||
<!-- <div class="nav-category ">
|
||||
<h4 title="网络类型">创立时间<span>:</span></h4>
|
||||
<%= link_to "2014", search_open_source_projects_path(:app_dir => @app_dir, :language => @language, :created_time => "2014", :name => params[:name]), :class => "nav-more J_More show" %>
|
||||
<%= link_to "2013", search_open_source_projects_path(:app_dir => @app_dir, :language => @language, :created_time => "2013", :name => params[:name]), :class => "nav-more J_More show" %>
|
||||
|
@ -260,7 +278,8 @@
|
|||
|
||||
<span class="arrow arrow-bottom"></span>
|
||||
</a>
|
||||
</div>
|
||||
</div> -->
|
||||
<p></p>
|
||||
</div>
|
||||
<div class="nav-category-group type-block" data-group="type-block">
|
||||
<div class="nav-flexbox ">
|
||||
|
@ -285,12 +304,12 @@
|
|||
|
||||
<div id="projects-index">
|
||||
<ul class='projects'>
|
||||
<% for i in (0..@hits.length-1) %>
|
||||
<% project = @hits[i] %>
|
||||
<% for i in (0..@open_source_projects.length-1) %>
|
||||
<% project = @open_source_projects[i] %>
|
||||
<% if !project.nil? %>
|
||||
<li class='project-table'>
|
||||
<div class='root'>
|
||||
<%= link_to project.result.name, open_source_project_path(project.result), :class => "project root leaf", :onclick => "user_trace_click_project_in_project_list_after_search(this,#{i+1},'#{request.session_options[:id]}');" %>
|
||||
<%= link_to project.name, open_source_project_path(project), :class => "project root leaf" %>
|
||||
<span style="float: right;"></span>
|
||||
<%= render :partial => 'open_source_projects/os_project_search_list', :locals => {:project => project, :i => i} %>
|
||||
</div>
|
||||
|
|
|
@ -96,14 +96,14 @@
|
|||
</ul>
|
||||
</div>
|
||||
|
||||
<div id="relation_recommendation" style="margin-top: 20px;">
|
||||
<!-- <div id="relation_recommendation" style="margin-top: 20px;">
|
||||
<div class="recommondation_title">软件关联网络</div>
|
||||
|
||||
<div id="echarts_simility" style="width: 220px; height: 200px;">
|
||||
</div>
|
||||
<div id="echarts_relation" style="width: 220px; height: 200px;">
|
||||
</div>
|
||||
</div>
|
||||
</div> -->
|
||||
</div>
|
||||
|
||||
|
||||
|
|
|
@ -1,21 +1,21 @@
|
|||
<!-- 全态势分析tab页 -->
|
||||
<div class="tab-pane fade in active" id="tab2">
|
||||
<h1 style="fontsize:19px;color:#E67E22;font-weight:900"><span style="color: #269AC9;font-weight:900">职位需求情况</span></h1>
|
||||
<!-- <h1 style="fontsize:19px;color:#E67E22;font-weight:900"><span style="color: #269AC9;font-weight:900">职位需求情况</span></h1>
|
||||
|
||||
<div id="highchart_container1" style="width: 330px; height: 200px; float: left"></div>
|
||||
<div id="highchart_container3" style="width: 330px; height: 200px; float: right"></div>
|
||||
<div id="highchart_container3" style="width: 330px; height: 200px; float: right"></div> -->
|
||||
|
||||
<h1 style="fontsize:19px;color: #E67E22;font-weight:900; clear: both"><span style="color: #269AC9;font-weight:900">薪资情况</span></h1>
|
||||
<h1 style="fontsize:19px;color: #E67E22;font-weight:900; clear: both"><span style="color: #269AC9;font-weight:900">工作地点分析</span></h1>
|
||||
|
||||
<div id="highchart_container2" style="width: 330px; height: 200px; float: left"></div>
|
||||
<div id="highchart_container3" style="width: 330px; height: 200px; float: left"></div>
|
||||
<div id="highchart_container4" style="width: 330px; height: 200px; float: right"></div>
|
||||
|
||||
<h1 style="fontsize:19px;color: #E67E22;font-weight:900; clear: both"><span style="color: #269AC9;font-weight:900">工作经验情况</span></h1>
|
||||
<h1 style="fontsize:19px;color: #E67E22;font-weight:900; clear: both"><span style="color: #269AC9;font-weight:900">工作经验分析</span></h1>
|
||||
|
||||
<div id="highchart_container5" style="width: 330px; height: 200px; float: left"></div>
|
||||
<div id="highchart_container6" style="width: 330px; height: 200px; float: right"></div>
|
||||
|
||||
<h1 style="fontsize:19px;color: #E67E22;font-weight:900; clear: both"><span style="color: #269AC9;font-weight:900">相关软件情况</span></h1>
|
||||
<h1 style="fontsize:19px;color: #E67E22;font-weight:900; clear: both"><span style="color: #269AC9;font-weight:900">相关软件分析</span></h1>
|
||||
|
||||
<div id="highchart_container7" style="width: 330px; height: 300px; float: left"></div>
|
||||
<div id="highchart_container8" style="width: 330px; height: 300px; float: right"></div>
|
||||
|
@ -385,7 +385,12 @@
|
|||
text: null
|
||||
},
|
||||
xAxis: {
|
||||
categories: categories
|
||||
categories: categories,
|
||||
labels: {
|
||||
formatter: function() {
|
||||
return(this.value.length>20?this.value.substring(0,20) + "...":this.value);
|
||||
}
|
||||
}
|
||||
},
|
||||
yAxis: {
|
||||
title: {
|
||||
|
@ -465,6 +470,7 @@
|
|||
'<%=osp.name.to_s %>',
|
||||
<% end %>
|
||||
],
|
||||
|
||||
data = [
|
||||
<% for i in 0..@chart8_osp.length-1 %>
|
||||
{
|
||||
|
@ -507,7 +513,12 @@
|
|||
text: null
|
||||
},
|
||||
xAxis: {
|
||||
categories: categories
|
||||
categories: categories,
|
||||
labels: {
|
||||
formatter: function() {
|
||||
return(this.value.length>20?this.value.substring(0,20) + "...":this.value);
|
||||
}
|
||||
}
|
||||
},
|
||||
yAxis: {
|
||||
title: {
|
||||
|
|
|
@ -282,7 +282,7 @@
|
|||
|
||||
<%###########################访问量统计代码##############################%>
|
||||
<script type="text/javascript">
|
||||
var cnzz_protocol = (("https:" == document.location.protocol) ? " https://" : " http://");
|
||||
document.write(unescape("%3Cspan id='cnzz_stat_icon_1256970726'%3E%3C/span%3E%3Cscript src='" + cnzz_protocol + "s4.cnzz.com/z_stat.php%3Fid%3D1256970726' type='text/javascript'%3E%3C/script%3E"));
|
||||
# var cnzz_protocol = (("https:" == document.location.protocol) ? " https://" : " http://");
|
||||
# document.write(unescape("%3Cspan id='cnzz_stat_icon_1256970726'%3E%3C/span%3E%3Cscript src='" + cnzz_protocol + "s4.cnzz.com/z_stat.php%3Fid%3D1256970726' type='text/javascript'%3E%3C/script%3E"));
|
||||
</script>
|
||||
|
||||
|
|
|
@ -266,9 +266,9 @@
|
|||
<p class="stats">
|
||||
<!--%= link_to project_info.visit_num, open_source_project_path(project) %-->
|
||||
<a>
|
||||
<%= topic.view_num+topic.view_num_ossean %>
|
||||
<!-- <%= topic.view_num+topic.view_num_ossean %> -->
|
||||
</a>
|
||||
<%= content_tag('span', l(:label_view_num)) %>
|
||||
<!-- <%= content_tag('span', l(:label_view_num)) %> -->
|
||||
</p>
|
||||
<!--<p class="stats">-->
|
||||
<!--%=link_to project.code_line %-->
|
||||
|
|
|
@ -7,18 +7,21 @@
|
|||
// });
|
||||
// })
|
||||
function reload_tags(){
|
||||
<% @tags = obj.reload.tag_list %>
|
||||
<% #@tags = obj.reload.tag_list %>
|
||||
<% @tags = obj.attributes["tags"]
|
||||
if @tags != nil && @tags.length>2
|
||||
@tags = @tags[1,@tags.length-2].split(">,<")
|
||||
end %>
|
||||
}
|
||||
</script>
|
||||
<!-- 1代表是user类型 2代表是project类型 3代表是issue类型 4代表需求-->
|
||||
<% if object_flag == '8'%>
|
||||
<%# @tags = obj.get_tag_list%>
|
||||
<% @tags = obj.tags.limit(9)%>
|
||||
<% elsif object_flag == '10' %>
|
||||
<% @tags = obj.AllTags.limit(9) %>
|
||||
<% else %>
|
||||
<% @tags = obj.reload.tag_list %>
|
||||
<% end%>
|
||||
<% #@tags = obj.get_tag_list%>
|
||||
<% @tags = obj.attributes["tags"]
|
||||
if @tags != nil && @tags.length>2
|
||||
@tags = @tags[1,@tags.length-2].split(">,<")
|
||||
end
|
||||
|
||||
@tags = @tags[0,9]%>
|
||||
|
||||
|
||||
<% if non_list_all and (@tags.size > 0) %>
|
||||
|
|
|
@ -1,53 +0,0 @@
|
|||
source ~/.rvm/scripts/rvm# Default setup is given for MySQL with ruby1.9. If you're running Redmine
|
||||
source ~/.rvm/scripts/rvm# Default setup is given for MySQL with ruby1.9. If you're running Redmine
|
||||
# with MySQL and ruby1.8, replace the adapter name with `mysql`.
|
||||
# Examples for PostgreSQL, SQLite3 and SQL Server can be found at the end.
|
||||
# Line indentation must be 2 spaces (no tabs).
|
||||
|
||||
production:
|
||||
adapter: mysql2
|
||||
database: redmine
|
||||
host: localhost
|
||||
username: root
|
||||
password: ""
|
||||
encoding: utf8
|
||||
|
||||
development:
|
||||
adapter: mysql2
|
||||
database: redmine_development
|
||||
host: 10.107.17.20
|
||||
username: root
|
||||
password: "1234"
|
||||
encoding: utf8
|
||||
|
||||
# Warning: The database defined as "test" will be erased and
|
||||
# re-generated from your development database when you run "rake".
|
||||
# Do not set this db to the same as development or production.
|
||||
test:
|
||||
adapter: mysql2
|
||||
database: redmine_test
|
||||
host: 10.107.17.20
|
||||
username: root
|
||||
password: "1234"
|
||||
encoding: utf8
|
||||
|
||||
# PostgreSQL configuration example
|
||||
#production:
|
||||
# adapter: postgresql
|
||||
# database: redmine
|
||||
# host: localhost
|
||||
# username: postgres
|
||||
# password: "postgres"
|
||||
|
||||
# SQLite3 configuration example
|
||||
#production:
|
||||
# adapter: sqlite3
|
||||
# database: db/redmine.sqlite3
|
||||
|
||||
# SQL Server configuration example
|
||||
#production:
|
||||
# adapter: sqlserver
|
||||
# database: redmine
|
||||
# host: localhost
|
||||
# username: jenkins
|
||||
# password: jenkins
|
|
@ -0,0 +1,7 @@
|
|||
environment "production"
|
||||
bind "unix:///var/run/ossean.sock"
|
||||
restart_command '/var/www/ossean/trustie2/restart_puma'
|
||||
daemonize true
|
||||
pidfile "/var/tmp/pids/puma.pid"
|
||||
#stdout_redirect "/var/www/ossean/trustie2/log/stdout", "/var/www/ossean/trustie2/log/stderr"
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
class AddIndexToRelativeMemoToOpenSourceProjectsTables < ActiveRecord::Migration
|
||||
def change
|
||||
arr = (1..70).to_a
|
||||
arr.each do |letter|
|
||||
add_index "relative_memo_to_open_source_projects_#{letter}".to_sym, :osp_id, :name => "osp_id_#{letter}"
|
||||
add_index "relative_memo_to_open_source_projects_#{letter}".to_sym, :memo_type, :name => "memo_type_#{letter}"
|
||||
add_index "relative_memo_to_open_source_projects_#{letter}".to_sym, :created_time, :name => "created_time_#{letter}"
|
||||
end
|
||||
end
|
||||
end
|
Binary file not shown.
After Width: | Height: | Size: 2.6 KiB |
Loading…
Reference in New Issue