diff --git a/.gitignore b/.gitignore
index 2d61090..a2ee364 100644
--- a/.gitignore
+++ b/.gitignore
@@ -110,8 +110,10 @@
/gather_program/.settings/
/gather_program/.classpath
-#/crawler/dailyScheduledCrawler/fetch_networks/target/
-#/crawler/dailyScheduledCrawler/fetch_networks/log/*
-#/crawler/daily_scheduler/log/*
-#/crawler/moreSmarterCrawler/fetch_networks/target/
-#/crawler/moreSmarterCrawler/fetch_networks/log/*
+
+/crawler/dailyScheduledCrawler/fetch_networks/target/
+/crawler/dailyScheduledCrawler/fetch_networks/log/*
+/crawler/daily_scheduler/log/*
+/crawler/daily_scheduler/log_mem/*
+/crawler/moreSmarterCrawler/fetch_networks/target/
+/crawler/moreSmarterCrawler/fetch_networks/log/*
diff --git a/gather_program/bin/gather_projects.sh b/gather_program/bin/gather_projects.sh
index e8be36a..79ceee0 100644
--- a/gather_program/bin/gather_projects.sh
+++ b/gather_program/bin/gather_projects.sh
@@ -16,4 +16,4 @@ JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTe
#echo JVM_ARGS=$JVM_ARGS
#ulimit -n 400000
#echo "" > nohup.out
-java $JVM_ARGS -classpath $CLASSPATH org.ossean.gather.process.GatherProcess gather_projects >>log/gather_projects.log 2>&1 &
\ No newline at end of file
+java $JVM_ARGS -classpath $CLASSPATH org.ossean.gather.process.GatherProcess gather_projects > log/gather_projects.log 2>&1 &
\ No newline at end of file
diff --git a/gather_program/bin/job_requirements.sh b/gather_program/bin/job_requirements.sh
index 8abfc74..d81149e 100644
--- a/gather_program/bin/job_requirements.sh
+++ b/gather_program/bin/job_requirements.sh
@@ -17,4 +17,4 @@ JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTe
#echo JVM_ARGS=$JVM_ARGS
#ulimit -n 400000
#echo "" > nohup.out
-java $JVM_ARGS -classpath $CLASSPATH org.ossean.gather.process.GatherProcess job_requirements >>log/job_requirements.log 2>&1 &
\ No newline at end of file
+java $JVM_ARGS -classpath $CLASSPATH org.ossean.gather.process.GatherProcess job_requirements > log/job_requirements.log 2>&1 &
\ No newline at end of file
diff --git a/gather_program/bin/relative_memos.sh b/gather_program/bin/relative_memos.sh
index 45d96e6..fbb4326 100644
--- a/gather_program/bin/relative_memos.sh
+++ b/gather_program/bin/relative_memos.sh
@@ -17,4 +17,4 @@ JVM_ARGS="-Xmn98m -Xmx2048m -Xms1024m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:Max
#echo JVM_ARGS=$JVM_ARGS
#ulimit -n 400000
#echo "" > nohup.out
-java $JVM_ARGS -classpath $CLASSPATH org.ossean.gather.process.GatherProcess relative_memos >>log/relative_memos.log 2>&1 &
\ No newline at end of file
+java $JVM_ARGS -classpath $CLASSPATH org.ossean.gather.process.GatherProcess relative_memos > log/relative_memos.log 2>&1 &
\ No newline at end of file
diff --git a/gather_program/bin/resources/applicationContext-myBatis.xml b/gather_program/bin/resources/applicationContext-myBatis.xml
index 69361cf..96b11e1 100644
--- a/gather_program/bin/resources/applicationContext-myBatis.xml
+++ b/gather_program/bin/resources/applicationContext-myBatis.xml
@@ -39,9 +39,9 @@
destroy-method="close">
+ value="jdbc:mysql://172.16.128.36:3306/ossean_production?characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&autoReconnect=true" />
-
+
diff --git a/gather_program/bin/resources/relative_memos.xml b/gather_program/bin/resources/relative_memos.xml
index c0aa136..ea2f60b 100644
--- a/gather_program/bin/resources/relative_memos.xml
+++ b/gather_program/bin/resources/relative_memos.xml
@@ -3,7 +3,7 @@
TableFlow
pointers
- oschina_question,slashdot,iteye_blog,51cto_blog,csdn_question,cnblog_news,cnblog_question,csdn_bbs,csdn_blogs,dewen_question,stackoverflow
+ oschina_question,iteye_blog,51cto_blog,csdn_question,cnblog_news,cnblog_question,csdn_bbs,csdn_blogs,dewen_question,stackoverflow
relative_memos
id,title,content,created_time,now(),type,tags,source,url,url_md5,author,author_url,view_num,review_num,extracted_time
id,title,content,created_time,updated_time,memo_type,tags,source,url,url_md5,author,author_url,view_num,review_num,extracted_time
diff --git a/gather_program/sql/settings.sql b/gather_program/sql/settings.sql
new file mode 100644
index 0000000..a77a396
--- /dev/null
+++ b/gather_program/sql/settings.sql
@@ -0,0 +1,29 @@
+/*
+Navicat MySQL Data Transfer
+
+Source Server : ossean
+Source Server Version : 50535
+Source Host : 127.0.0.1:3306
+Source Database : ossean_new
+
+Target Server Type : MYSQL
+Target Server Version : 50535
+File Encoding : 65001
+
+Date: 2016-11-15 20:04:15
+*/
+
+SET FOREIGN_KEY_CHECKS=0;
+
+-- ----------------------------
+-- Table structure for settings
+-- ----------------------------
+DROP TABLE IF EXISTS `settings`;
+CREATE TABLE `settings` (
+ `id` int(11) NOT NULL AUTO_INCREMENT,
+ `name` varchar(255) NOT NULL DEFAULT '',
+ `value` text,
+ `updated_on` datetime DEFAULT NULL,
+ PRIMARY KEY (`id`),
+ KEY `index_settings_on_name` (`name`) USING BTREE
+) ENGINE=InnoDB DEFAULT CHARSET=utf8;
diff --git a/gather_program/sql/taggings.sql b/gather_program/sql/taggings.sql
new file mode 100644
index 0000000..9087bdb
--- /dev/null
+++ b/gather_program/sql/taggings.sql
@@ -0,0 +1,36 @@
+/*
+Navicat MySQL Data Transfer
+
+Source Server : ossean
+Source Server Version : 50535
+Source Host : 127.0.0.1:3306
+Source Database : ossean_production
+
+Target Server Type : MYSQL
+Target Server Version : 50535
+File Encoding : 65001
+
+Date: 2016-11-13 22:21:05
+*/
+
+SET FOREIGN_KEY_CHECKS=0;
+
+-- ----------------------------
+-- Table structure for taggings
+-- ----------------------------
+DROP TABLE IF EXISTS `taggings`;
+CREATE TABLE `taggings` (
+ `id` int(11) NOT NULL AUTO_INCREMENT,
+ `tag_id` int(11) NOT NULL,
+ `taggable_id` int(11) NOT NULL,
+ `taggable_type` varchar(255) NOT NULL,
+ `tagger_id` int(11) DEFAULT NULL,
+ `tagger_type` varchar(255) DEFAULT NULL,
+ `context` varchar(128) DEFAULT NULL,
+ `created_at` datetime DEFAULT NULL,
+ `created_time` datetime DEFAULT NULL,
+ `disagree_num` int(11) DEFAULT '0',
+ `tag_source` varchar(255) DEFAULT NULL,
+ PRIMARY KEY (`id`),
+ UNIQUE KEY `index_taggings_on_tag_id_and_taggable_id_and_taggable_type` (`tag_id`,`taggable_id`,`taggable_type`) USING BTREE
+) ENGINE=InnoDB AUTO_INCREMENT=19315557 DEFAULT CHARSET=utf8;
diff --git a/gather_program/src/main/java/org/ossean/gather/process/GatherThread.java b/gather_program/src/main/java/org/ossean/gather/process/GatherThread.java
index 7149774..fabb5ef 100644
--- a/gather_program/src/main/java/org/ossean/gather/process/GatherThread.java
+++ b/gather_program/src/main/java/org/ossean/gather/process/GatherThread.java
@@ -1,363 +1,363 @@
-package org.ossean.gather.process;
-
-import java.util.List;
-
-import javax.annotation.Resource;
-
-import org.apache.log4j.Logger;
-import org.ossean.gather.model.Configure;
-import org.ossean.gather.model.GatherProject;
-import org.ossean.gather.model.JobRequirement;
-import org.ossean.gather.model.PKControlPosts;
-import org.ossean.gather.model.PKControlProjects;
-import org.ossean.gather.model.RelativeMemo;
-import org.ossean.gather.model.Taggings;
-import org.ossean.gather.sourceDao.GatherDao;
-import org.ossean.gather.sourceDao.PKControlPostsDao;
-import org.ossean.gather.sourceDao.PKControlProjectsDao;
-import org.ossean.gather.targetDao.PointerDao;
-import org.ossean.gather.targetDao.TargetDao;
-import org.springframework.context.annotation.Scope;
-import org.springframework.stereotype.Component;
-
-@Component("gatherThread")
-@Scope("prototype")
-public class GatherThread implements Runnable {
- private static Logger logger = Logger.getLogger(GatherThread.class);
- private Configure conf;
-
- @Resource
- private GatherDao gatherDao;
- @Resource
- private PointerDao pointerDao;
- @Resource
- private PKControlPostsDao pkControlPostsDao;
- @Resource
- private TargetDao targetDao;
- @Resource
- private PKControlProjectsDao pkControlProjectsDao;
-
- private int idsBegin; // 转移开始Id值
- private int idsEnd; // 转移结束Id值
- private int idsIncrement;// 每次转移的Id量
-
- private int beginId;
- private int endId;
-
- private String sourceTableName;
- private String pkControlPostsTableName = "pk_control_posts";
- private String pkControlProjectsTableName = "pk_control_projects";
- private String taggingsTableName = "taggings";
- private String tagsTableName = "tags";
-
- private String gatherPostsTableName = "relative_memos";
- private int maxId;
-
- public void setParameters(Configure conf, String sourceTableName) {
- this.conf = conf;
- this.sourceTableName = sourceTableName;
- }
-
- // 读指针
- public int readPointer(String table, String source, String target) {
- int pointer = 1;
- try {
- pointer = pointerDao.getPointer(table, source, target);
- } catch (Exception e) {
- // 表示表中没有数据
- logger.info("No such pointer! Create one");
- pointerDao.insertPointer(table, source, target, 1);
- }
- return pointer;
- }
-
- @Override
- public void run() {
-// long start = System.currentTimeMillis();
- Thread.currentThread().setName(sourceTableName);
- idsIncrement = conf.getIdsIncrement();
- idsBegin = readPointer(conf.getPointerTableName(), sourceTableName,
- conf.getTargetTableName());
- idsEnd = maxId = gatherDao.getMaxId(sourceTableName);
- while (idsBegin < idsEnd) {
- beginId = idsBegin;
- endId = beginId + idsIncrement - 1; // 取数据时两边都取等号
- if (endId <= maxId) {
- handleBatchData(beginId, endId, conf);
- idsBegin = idsBegin + idsIncrement;
- } else {
- endId = maxId; // endId应小于maxId
- handleBatchData(beginId, endId, conf);
- break;
- }
- }
- GatherProcess.gatherState.put(sourceTableName, false);
-// long end = System.currentTimeMillis();
-// logger.info((end - start) / 6000);
- }
-
- public void handleBatchData(int beginId, int endId, Configure conf) {
- logger.info("BeginId#" + sourceTableName + ":" + beginId);
- // 表示任务没有完成
- int maxId = gatherDao.getMaxId(sourceTableName);
- // 防止转移超过当前最大值的Id数据
- if (beginId >= 0 && endId > 0 && maxId >= endId) {
- // 更新执行开始时间
- logger.info("begin gathering...");
- // 插入Id段数据,忽略重复值
- try {
- String[] sourceFields = conf.getSourceFields().split(",");
- String[] targetFields = conf.getTargetFields().split(",");
- String selectItems = "";
- for (int i = 0; i < sourceFields.length; i++) {
- String str_source = sourceFields[i];
- String str_target = targetFields[i];
- selectItems += str_source + " as " + str_target + ",";
- }
- selectItems = selectItems
- .substring(0, selectItems.length() - 1) + " ";
- if (conf.getTargetTableName().equals("relative_memos")) {
- List dataGet = gatherDao.getPostGatherData(
- sourceTableName, selectItems, beginId, endId,
- conf.getAndWhere());
- for (int i = 0; i < dataGet.size(); i++) {
- RelativeMemo model = dataGet.get(i);
- String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该帖子
- // 是否更新
- int postId = 0;
- if(GatherProcess.urlMd5Set.contains(urlMD5)){
- //urlmd5存在则更新
- RelativeMemo samePost = targetDao.findPostByUrlMD5(
- conf.getTargetTableName(), urlMD5);
- // update gather_projects表中对应的记录,在维持待更新表
- postId = samePost.getId();
- model.setId(postId);
- handleUpdateGatherPosts(samePost.getId(), model);
- }else{
- // 不存在 插入
- PKControlPosts pkControlModel = pkControlPostsDao
- .selectItemByUrlMD5(
- pkControlPostsTableName, urlMD5);// 查看有没有固定的id
- if (pkControlModel != null)
- model.setId(pkControlModel.getId());
- else {
- // 在pk_control_posts表中生成当前项目对应的id
- pkControlPostsDao.insertOneItem(
- pkControlPostsTableName, urlMD5);
- // 查看刚刚插入信息的id
- PKControlPosts controlItem = pkControlPostsDao
- .selectItemByUrlMD5(
- pkControlPostsTableName, urlMD5);
- // 用id构造model对应的固定不变的id
- model.setId(controlItem.getId());
- postId = model.getId();
- }
- handleInsertGatherPosts(model, conf);
- GatherProcess.urlMd5Set.add(urlMD5);
- }
-
- // 将tag和项目的关系存入表item_tag_relation 并分离tag
- String tags = model.getTags();
- if (tags == null) {
- // 表示该项目没有标签
- continue;
- }
- List tagList = DataHandler
- .tagsSegmentation(tags);
- for (String tag : tagList) {
- targetDao.insertTag(tagsTableName, tag);// ignore方式插入该项目的标签
- int tag_id = targetDao.selectTagIdByName(
- tagsTableName, tag);
- Taggings taggings = new Taggings();
- taggings.setTag_id(tag_id);
- taggings.setTaggable_id(postId);
- taggings.setTaggable_type("RelativeMemo");
- taggings.setContext("tags");
- taggings.setCreated_at(DataHandler.getNow());
- // 将Taggings对象存入数据库中
- try {
- targetDao.insertTaggings(taggingsTableName,
- taggings);
- } catch (Exception e) {
- // 在插入记录之前 relative_memos表中的记录已经被删除掉了
- logger.error(e);
- System.exit(0);
- }
- }
- }
- } else if (conf.getTargetTableName().equals("gather_projects")) {
- List dataGet = gatherDao.getPrjGatherData(
- sourceTableName, selectItems, beginId, endId,
- conf.getAndWhere());
- for (int i = 0; i < dataGet.size(); i++) {
- GatherProject model = dataGet.get(i);
- String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该项目
- // 是否更新
- int prjId = 0;
- if(GatherProcess.urlMd5Set.contains(urlMD5)){
- GatherProject samePrj = targetDao.findPrjByUrlMD5(
- conf.getTargetTableName(), urlMD5);
- // update gather_projects表中对应的记录,在维持待更新表
- prjId = samePrj.getId();
- model.setId(prjId);
- model.setUpdate_mark(1);
- handleUpdateGatherProjects(samePrj.getId(), model);
- }else{
- // 不存在 插入
- PKControlProjects pkControlProjects = pkControlProjectsDao
- .selectItemByUrlMD5(
- pkControlProjectsTableName, urlMD5);// 查看有没有固定的id
- if (pkControlProjects != null)
- model.setId(pkControlProjects.getId());
- else {
- // 在pk_control_posts表中生成当前项目对应的id
- pkControlProjectsDao.insertOneItem(
- pkControlProjectsTableName, urlMD5);
- // 查看刚刚插入信息的id
- PKControlProjects controlItem = pkControlProjectsDao
- .selectItemByUrlMD5(
- pkControlProjectsTableName,
- urlMD5);
- // 用id构造model对应的固定不变的id
- model.setId(controlItem.getId());
- prjId = model.getId();
- }
- model.setUpdate_mark(0);
- handleInsertGatherProjects(model, conf);
- GatherProcess.urlMd5Set.add(urlMD5);
- }
-
-// // 将tag和项目的关系存入表item_tag_relation 并分离tag
-// String tags = model.getTags();
-// if (tags == null) {
-// // 表示该项目没有标签
-// continue;
-// }
-// List tagList = DataHandler
-// .tagsSegmentation(tags);
-// for (String tag : tagList) {
-// targetDao.insertTag(tagsTableName, tag);// ignore方式插入该项目的标签
-// int tag_id = targetDao.selectTagIdByName(
-// tagsTableName, tag);
-// Taggings taggings = new Taggings();
-// taggings.setTag_id(tag_id);
-// taggings.setTaggable_id(prjId);
-// taggings.setTaggable_type("OpenSourceProject");
-// taggings.setContext("tags");
-// taggings.setCreated_at(DataHandler.getNow());
-// // 将Taggings对象存入数据库中
-// try {
-// targetDao.insertTaggings(taggingsTableName,
-// taggings);
-// } catch (Exception e) {
-// // 在插入记录之前 relative_memos表中的记录已经被删除掉了
-// logger.error(e);
-// System.exit(0);
-// }
-// }
- }
- } else {
- List dataGet = gatherDao.getJobGatherData(
- sourceTableName, selectItems, beginId, endId,
- conf.getAndWhere());
- for (int i = 0; i < dataGet.size(); i++) {
- JobRequirement model = dataGet.get(i);
- String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该帖子
- // 是否更新
- int postId = 0;
- if(GatherProcess.urlMd5Set.contains(urlMD5)){
- JobRequirement sameJob = targetDao.findJobByUrlMD5(
- conf.getTargetTableName(), urlMD5);
- // update gather_projects表中对应的记录,在维持待更新表
- postId = sameJob.getId();
- model.setId(postId);
- handleUpdateGatherJobs(sameJob.getId(), model);
- }else{
- // 不存在 插入
- PKControlPosts pkControlPosts = pkControlPostsDao
- .selectItemByUrlMD5(
- pkControlPostsTableName, urlMD5);// 查看有没有固定的id
- if (pkControlPosts != null)
- model.setId(pkControlPosts.getId());
- else {
- // 在pk_control_posts表中生成当前项目对应的id
- pkControlPostsDao.insertOneItem(
- pkControlPostsTableName, urlMD5);
- // 查看刚刚插入信息的id
- PKControlPosts controlItem = pkControlPostsDao
- .selectItemByUrlMD5(
- pkControlPostsTableName, urlMD5);
- // 用id构造model对应的固定不变的id
- model.setId(controlItem.getId());
- postId = model.getId();
- }
- handleInsertGatherJobs(model, conf);
- GatherProcess.urlMd5Set.add(urlMD5);
- }
- }
- }
-
- } catch (Exception ex) {
- // 数据迁移过程可能发生异常情况
- logger.error(ex);
- System.exit(0);
- }
-
- // 更新游标到本次 EndId+1;
- pointerDao.updatePointer(conf.getPointerTableName(),
- sourceTableName, conf.getTargetTableName(), endId + 1);// sourceIdBegin
- // +
- // idsIncrement
- logger.info("current--" + sourceTableName + ": " + endId);
- }
- }
-
- // 处理URL不存在的帖子 插入relative_memos表
- public void handleInsertGatherPosts(RelativeMemo model, Configure conf) {
- try {
- targetDao.insertRelativeMemo(conf.getTargetTableName(),
- conf.getTargetFields(), model);
- } catch (Exception e) {
- logger.error(e);
- }
- }
-
- // 处理URL相同的帖子更新 id表示更新的帖子固定id
- public void handleUpdateGatherPosts(int id, RelativeMemo model_new) {
- targetDao.updateRelativeMemo(gatherPostsTableName, model_new, id);// 更新数据relative_memos表
- }
-
- // 处理URL不存在的项目 插入gather_projects表
- public void handleInsertGatherProjects(GatherProject model, Configure conf) {
- try {
- targetDao.insertOpenSourceProject(conf.getTargetTableName(),
- conf.getTargetFields(), model);
- } catch (Exception e) {
- logger.error(e);
- }
-
- }
-
- // 处理URL相同的项目更新 id表示更新的项目固定id
- public void handleUpdateGatherProjects(int id, GatherProject model_new) {
- targetDao.updateOpenSourceProject(conf.getTargetTableName(), model_new,
- id);// 更新数据gather_projects表
- }
-
- // 处理URL不存在的项目 插入job_requirements表
- public void handleInsertGatherJobs(JobRequirement model, Configure conf) {
- try {
- targetDao.insertJobRequirement(conf.getTargetTableName(),
- conf.getTargetFields(), model);
- } catch (Exception e) {
- logger.error(e);
- }
-
- }
-
- // 处理URL相同的项目更新 id表示更新的项目固定id
- public void handleUpdateGatherJobs(int id, JobRequirement model_new) {
- targetDao
- .updateJobRequirement(conf.getTargetTableName(), model_new, id);// 更新数据job_requirements表
- }
-}
+package org.ossean.gather.process;
+
+import java.util.List;
+
+import javax.annotation.Resource;
+
+import org.apache.log4j.Logger;
+import org.ossean.gather.model.Configure;
+import org.ossean.gather.model.GatherProject;
+import org.ossean.gather.model.JobRequirement;
+import org.ossean.gather.model.PKControlPosts;
+import org.ossean.gather.model.PKControlProjects;
+import org.ossean.gather.model.RelativeMemo;
+import org.ossean.gather.model.Taggings;
+import org.ossean.gather.sourceDao.GatherDao;
+import org.ossean.gather.sourceDao.PKControlPostsDao;
+import org.ossean.gather.sourceDao.PKControlProjectsDao;
+import org.ossean.gather.targetDao.PointerDao;
+import org.ossean.gather.targetDao.TargetDao;
+import org.springframework.context.annotation.Scope;
+import org.springframework.stereotype.Component;
+
+@Component("gatherThread")
+@Scope("prototype")
+public class GatherThread implements Runnable {
+ private static Logger logger = Logger.getLogger(GatherThread.class);
+ private Configure conf;
+
+ @Resource
+ private GatherDao gatherDao;
+ @Resource
+ private PointerDao pointerDao;
+ @Resource
+ private PKControlPostsDao pkControlPostsDao;
+ @Resource
+ private TargetDao targetDao;
+ @Resource
+ private PKControlProjectsDao pkControlProjectsDao;
+
+ private int idsBegin; // 转移开始Id值
+ private int idsEnd; // 转移结束Id值
+ private int idsIncrement;// 每次转移的Id量
+
+ private int beginId;
+ private int endId;
+
+ private String sourceTableName;
+ private String pkControlPostsTableName = "pk_control_posts";
+ private String pkControlProjectsTableName = "pk_control_projects";
+ private String memoTaggingsTableName = "memo_taggings";
+ private String tagsTableName = "tags";
+
+ private String gatherPostsTableName = "relative_memos";
+ private int maxId;
+
+ public void setParameters(Configure conf, String sourceTableName) {
+ this.conf = conf;
+ this.sourceTableName = sourceTableName;
+ }
+
+ // 读指针
+ public int readPointer(String table, String source, String target) {
+ int pointer = 1;
+ try {
+ pointer = pointerDao.getPointer(table, source, target);
+ } catch (Exception e) {
+ // 表示表中没有数据
+ logger.info("No such pointer! Create one");
+ pointerDao.insertPointer(table, source, target, 1);
+ }
+ return pointer;
+ }
+
+ @Override
+ public void run() {
+// long start = System.currentTimeMillis();
+ Thread.currentThread().setName(sourceTableName);
+ idsIncrement = conf.getIdsIncrement();
+ idsBegin = readPointer(conf.getPointerTableName(), sourceTableName,
+ conf.getTargetTableName());
+ idsEnd = maxId = gatherDao.getMaxId(sourceTableName);
+ while (idsBegin < idsEnd) {
+ beginId = idsBegin;
+ endId = beginId + idsIncrement - 1; // 取数据时两边都取等号
+ if (endId <= maxId) {
+ handleBatchData(beginId, endId, conf);
+ idsBegin = idsBegin + idsIncrement;
+ } else {
+ endId = maxId; // endId应小于maxId
+ handleBatchData(beginId, endId, conf);
+ break;
+ }
+ }
+ GatherProcess.gatherState.put(sourceTableName, false);
+// long end = System.currentTimeMillis();
+// logger.info((end - start) / 6000);
+ }
+
+ public void handleBatchData(int beginId, int endId, Configure conf) {
+ logger.info("BeginId#" + sourceTableName + ":" + beginId);
+ // 表示任务没有完成
+ int maxId = gatherDao.getMaxId(sourceTableName);
+ // 防止转移超过当前最大值的Id数据
+ if (beginId >= 0 && endId > 0 && maxId >= endId) {
+ // 更新执行开始时间
+ logger.info("begin gathering...");
+ // 插入Id段数据,忽略重复值
+ try {
+ String[] sourceFields = conf.getSourceFields().split(",");
+ String[] targetFields = conf.getTargetFields().split(",");
+ String selectItems = "";
+ for (int i = 0; i < sourceFields.length; i++) {
+ String str_source = sourceFields[i];
+ String str_target = targetFields[i];
+ selectItems += str_source + " as " + str_target + ",";
+ }
+ selectItems = selectItems
+ .substring(0, selectItems.length() - 1) + " ";
+ if (conf.getTargetTableName().equals("relative_memos")) {
+ List dataGet = gatherDao.getPostGatherData(
+ sourceTableName, selectItems, beginId, endId,
+ conf.getAndWhere());
+ for (int i = 0; i < dataGet.size(); i++) {
+ RelativeMemo model = dataGet.get(i);
+ String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该帖子
+ // 是否更新
+ int postId = 0;
+ if(GatherProcess.urlMd5Set.contains(urlMD5)){
+ //urlmd5存在则更新
+ RelativeMemo samePost = targetDao.findPostByUrlMD5(
+ conf.getTargetTableName(), urlMD5);
+ // update gather_projects表中对应的记录,在维持待更新表
+ postId = samePost.getId();
+ model.setId(postId);
+ handleUpdateGatherPosts(samePost.getId(), model);
+ }else{
+ // 不存在 插入
+ PKControlPosts pkControlModel = pkControlPostsDao
+ .selectItemByUrlMD5(
+ pkControlPostsTableName, urlMD5);// 查看有没有固定的id
+ if (pkControlModel != null)
+ model.setId(pkControlModel.getId());
+ else {
+ // 在pk_control_posts表中生成当前项目对应的id
+ pkControlPostsDao.insertOneItem(
+ pkControlPostsTableName, urlMD5);
+ // 查看刚刚插入信息的id
+ PKControlPosts controlItem = pkControlPostsDao
+ .selectItemByUrlMD5(
+ pkControlPostsTableName, urlMD5);
+ // 用id构造model对应的固定不变的id
+ model.setId(controlItem.getId());
+ postId = model.getId();
+ }
+ handleInsertGatherPosts(model, conf);
+ GatherProcess.urlMd5Set.add(urlMD5);
+ }
+
+ // 将tag和项目的关系存入表item_tag_relation 并分离tag
+ String tags = model.getTags();
+ if (tags == null) {
+ // 表示该项目没有标签
+ continue;
+ }
+ List tagList = DataHandler
+ .tagsSegmentation(tags);
+ for (String tag : tagList) {
+ targetDao.insertTag(tagsTableName, tag);// ignore方式插入该项目的标签
+ int tag_id = targetDao.selectTagIdByName(
+ tagsTableName, tag);
+ Taggings taggings = new Taggings();
+ taggings.setTag_id(tag_id);
+ taggings.setTaggable_id(postId);
+ taggings.setTaggable_type("RelativeMemo");
+ taggings.setContext("tags");
+ taggings.setCreated_at(DataHandler.getNow());
+ // 将Taggings对象存入数据库中
+ try {
+ targetDao.insertTaggings(memoTaggingsTableName,
+ taggings);
+ } catch (Exception e) {
+ // 在插入记录之前 relative_memos表中的记录已经被删除掉了
+ logger.error(e);
+ System.exit(0);
+ }
+ }
+ }
+ } else if (conf.getTargetTableName().equals("gather_projects")) {
+ List dataGet = gatherDao.getPrjGatherData(
+ sourceTableName, selectItems, beginId, endId,
+ conf.getAndWhere());
+ for (int i = 0; i < dataGet.size(); i++) {
+ GatherProject model = dataGet.get(i);
+ String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该项目
+ // 是否更新
+ int prjId = 0;
+ if(GatherProcess.urlMd5Set.contains(urlMD5)){
+ GatherProject samePrj = targetDao.findPrjByUrlMD5(
+ conf.getTargetTableName(), urlMD5);
+ // update gather_projects表中对应的记录,在维持待更新表
+ prjId = samePrj.getId();
+ model.setId(prjId);
+ model.setUpdate_mark(2);
+ handleUpdateGatherProjects(samePrj.getId(), model);
+ }else{
+ // 不存在 插入
+ PKControlProjects pkControlProjects = pkControlProjectsDao
+ .selectItemByUrlMD5(
+ pkControlProjectsTableName, urlMD5);// 查看有没有固定的id
+ if (pkControlProjects != null)
+ model.setId(pkControlProjects.getId());
+ else {
+ // 在pk_control_posts表中生成当前项目对应的id
+ pkControlProjectsDao.insertOneItem(
+ pkControlProjectsTableName, urlMD5);
+ // 查看刚刚插入信息的id
+ PKControlProjects controlItem = pkControlProjectsDao
+ .selectItemByUrlMD5(
+ pkControlProjectsTableName,
+ urlMD5);
+ // 用id构造model对应的固定不变的id
+ model.setId(controlItem.getId());
+ prjId = model.getId();
+ }
+ model.setUpdate_mark(0);
+ handleInsertGatherProjects(model, conf);
+ GatherProcess.urlMd5Set.add(urlMD5);
+ }
+
+// // 将tag和项目的关系存入表item_tag_relation 并分离tag
+// String tags = model.getTags();
+// if (tags == null) {
+// // 表示该项目没有标签
+// continue;
+// }
+// List tagList = DataHandler
+// .tagsSegmentation(tags);
+// for (String tag : tagList) {
+// targetDao.insertTag(tagsTableName, tag);// ignore方式插入该项目的标签
+// int tag_id = targetDao.selectTagIdByName(
+// tagsTableName, tag);
+// Taggings taggings = new Taggings();
+// taggings.setTag_id(tag_id);
+// taggings.setTaggable_id(prjId);
+// taggings.setTaggable_type("OpenSourceProject");
+// taggings.setContext("tags");
+// taggings.setCreated_at(DataHandler.getNow());
+// // 将Taggings对象存入数据库中
+// try {
+// targetDao.insertTaggings(taggingsTableName,
+// taggings);
+// } catch (Exception e) {
+// // 在插入记录之前 relative_memos表中的记录已经被删除掉了
+// logger.error(e);
+// System.exit(0);
+// }
+// }
+ }
+ } else {
+ List dataGet = gatherDao.getJobGatherData(
+ sourceTableName, selectItems, beginId, endId,
+ conf.getAndWhere());
+ for (int i = 0; i < dataGet.size(); i++) {
+ JobRequirement model = dataGet.get(i);
+ String urlMD5 = model.getUrl_md5();// 通过urlMD5判断是不是已经存在该帖子
+ // 是否更新
+ int postId = 0;
+ if(GatherProcess.urlMd5Set.contains(urlMD5)){
+ JobRequirement sameJob = targetDao.findJobByUrlMD5(
+ conf.getTargetTableName(), urlMD5);
+ // update gather_projects表中对应的记录,在维持待更新表
+ postId = sameJob.getId();
+ model.setId(postId);
+ handleUpdateGatherJobs(sameJob.getId(), model);
+ }else{
+ // 不存在 插入
+ PKControlPosts pkControlPosts = pkControlPostsDao
+ .selectItemByUrlMD5(
+ pkControlPostsTableName, urlMD5);// 查看有没有固定的id
+ if (pkControlPosts != null)
+ model.setId(pkControlPosts.getId());
+ else {
+ // 在pk_control_posts表中生成当前项目对应的id
+ pkControlPostsDao.insertOneItem(
+ pkControlPostsTableName, urlMD5);
+ // 查看刚刚插入信息的id
+ PKControlPosts controlItem = pkControlPostsDao
+ .selectItemByUrlMD5(
+ pkControlPostsTableName, urlMD5);
+ // 用id构造model对应的固定不变的id
+ model.setId(controlItem.getId());
+ postId = model.getId();
+ }
+ handleInsertGatherJobs(model, conf);
+ GatherProcess.urlMd5Set.add(urlMD5);
+ }
+ }
+ }
+
+ } catch (Exception ex) {
+ // 数据迁移过程可能发生异常情况
+ logger.error(ex);
+ System.exit(0);
+ }
+
+ // 更新游标到本次 EndId+1;
+ pointerDao.updatePointer(conf.getPointerTableName(),
+ sourceTableName, conf.getTargetTableName(), endId + 1);// sourceIdBegin
+ // +
+ // idsIncrement
+ logger.info("current--" + sourceTableName + ": " + endId);
+ }
+ }
+
+ // 处理URL不存在的帖子 插入relative_memos表
+ public void handleInsertGatherPosts(RelativeMemo model, Configure conf) {
+ try {
+ targetDao.insertRelativeMemo(conf.getTargetTableName(),
+ conf.getTargetFields(), model);
+ } catch (Exception e) {
+ logger.error(e);
+ }
+ }
+
+ // 处理URL相同的帖子更新 id表示更新的帖子固定id
+ public void handleUpdateGatherPosts(int id, RelativeMemo model_new) {
+ targetDao.updateRelativeMemo(gatherPostsTableName, model_new, id);// 更新数据relative_memos表
+ }
+
+ // 处理URL不存在的项目 插入gather_projects表
+ public void handleInsertGatherProjects(GatherProject model, Configure conf) {
+ try {
+ targetDao.insertOpenSourceProject(conf.getTargetTableName(),
+ conf.getTargetFields(), model);
+ } catch (Exception e) {
+ logger.error(e);
+ }
+
+ }
+
+ // 处理URL相同的项目更新 id表示更新的项目固定id
+ public void handleUpdateGatherProjects(int id, GatherProject model_new) {
+ targetDao.updateOpenSourceProject(conf.getTargetTableName(), model_new,
+ id);// 更新数据gather_projects表
+ }
+
+ // 处理URL不存在的项目 插入job_requirements表
+ public void handleInsertGatherJobs(JobRequirement model, Configure conf) {
+ try {
+ targetDao.insertJobRequirement(conf.getTargetTableName(),
+ conf.getTargetFields(), model);
+ } catch (Exception e) {
+ logger.error(e);
+ }
+
+ }
+
+ // 处理URL相同的项目更新 id表示更新的项目固定id
+ public void handleUpdateGatherJobs(int id, JobRequirement model_new) {
+ targetDao
+ .updateJobRequirement(conf.getTargetTableName(), model_new, id);// 更新数据job_requirements表
+ }
+}
diff --git a/match_program/bin/resources/applicationContext_mybatis.xml b/match_program/bin/resources/applicationContext_mybatis.xml
index 04ac8c6..65e88cf 100644
--- a/match_program/bin/resources/applicationContext_mybatis.xml
+++ b/match_program/bin/resources/applicationContext_mybatis.xml
@@ -36,9 +36,9 @@
destroy-method="close">
+ value="jdbc:mysql://172.16.128.36:3306/ossean_production?characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&autoReconnect=true" />
-
+
diff --git a/match_program/src/main/java/com/ossean/match/lucene/LuceneIndex.java b/match_program/src/main/java/com/ossean/match/lucene/LuceneIndex.java
index e8aa53c..d4c27f9 100644
--- a/match_program/src/main/java/com/ossean/match/lucene/LuceneIndex.java
+++ b/match_program/src/main/java/com/ossean/match/lucene/LuceneIndex.java
@@ -17,7 +17,6 @@ import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
@@ -36,11 +35,9 @@ import org.wltea.analyzer.lucene.IKAnalyzer;
import com.ossean.match.dao.ProjectDao;
import com.ossean.match.dao.RelativeMemoDao;
-import com.ossean.match.matchprocess.CountFrequency;
import com.ossean.match.matchprocess.MatchIncrement;
import com.ossean.match.model.Project;
import com.ossean.match.model.RelativeMemo;
-import com.ossean.match.model.Tag;
import com.ossean.match.utils.Normalizer;
@Component("luceneindex")
@@ -84,36 +81,6 @@ public class LuceneIndex {
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
IndexWriter iw = new IndexWriter(dire, iwc);
return iw;
- }
-
- public static void buildTagIndex(List tags) {
- Logger logger1 = LoggerFactory.getLogger(LuceneIndex.class);
- Directory dir;
- Analyzer analyzer = new IKAnalyzer(true);
- try {
- dir = FSDirectory.open(Paths.get(CountFrequency.tagIndexDir));
- IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
- iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
- IndexWriter writer = new IndexWriter(dir, iwc);
- for (Tag tag : tags) {
- // 对标签建立索引
- Document doc = new Document();
- String name = tag.getName().toLowerCase();
- // 标签原始名字进行存储
- doc.add(new StringField("name", "" + name, Field.Store.YES));
- doc.add(new StringField("id", "" + tag.getId(), Field.Store.YES));
- // 对标签进行分词
- doc.add(new TextField("items", Normalizer.normalize(name),
- Field.Store.NO));
- writer.addDocument(doc);
- }
- writer.close();
-
- } catch (IOException e) {
- logger1.error("buildTagIndex IOException: " + e);
- }
- // Analyzer analyzer = new SimpleAnalyzer();
-
}
/**
@@ -172,9 +139,6 @@ public class LuceneIndex {
endId += step;
}
}
- matchIncrement.setPrjHistory(0);
- matchIncrement.setPrjIndexed(maxPrjId);
- matchIncrement.writeIntoRecord();
writer.commit();
writer.close();
}
@@ -225,9 +189,7 @@ public class LuceneIndex {
endId += step;
}
}
- matchIncrement.setPrjHistory(0);
matchIncrement.setMemoHistory(lastMemoId);
- matchIncrement.setMemoIndexed(lastMemoId);
matchIncrement.writeIntoRecord();
/*FileOutputStream fout = new FileOutputStream(new File("record.txt"));
fout.write((0 + "\t" + lastMemoId + "\t" + 0 + "\t" + 0).getBytes()); //初始化record.txt,项目初始id为0,帖子初始id为创建索引的最后一个帖子的id
@@ -256,7 +218,7 @@ public class LuceneIndex {
continue;
}
Document doc = new Document();
- doc.add(new StringField(memoIdFieldName, String.valueOf(rsId), Store.YES));
+ doc.add(new StringField(memoIdFieldName, String.valueOf(rsId) + "", Store.YES));
doc.add(new TextField(titleFieldName, memo.getTitle(), Store.NO));
String memoTagsString = memo.getTags();
if (memoTagsString == null) {
@@ -266,7 +228,7 @@ public class LuceneIndex {
for(String memoTag : memoTagsList){
doc.add(new StringField(memoTagsFieldName, memoTag, Store.NO));
}
- indexWriter.addDocument(doc);
+ indexWriter.updateDocument(new Term(memoIdFieldName, String.valueOf(rsId) + ""), doc);
}
indexWriter.commit();
indexWriter.close();
diff --git a/match_program/src/main/java/com/ossean/match/lucene/LuceneSearch.java b/match_program/src/main/java/com/ossean/match/lucene/LuceneSearch.java
index df043b6..66794e1 100644
--- a/match_program/src/main/java/com/ossean/match/lucene/LuceneSearch.java
+++ b/match_program/src/main/java/com/ossean/match/lucene/LuceneSearch.java
@@ -7,11 +7,8 @@ import java.util.List;
import javax.annotation.Resource;
import org.apache.lucene.document.Document;
-import org.apache.lucene.index.FieldInvertState;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
-import org.apache.lucene.queryparser.classic.ParseException;
-import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Explanation;
@@ -24,10 +21,8 @@ import org.apache.lucene.search.similarities.DefaultSimilarity;
import org.apache.lucene.search.similarities.Similarity;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
-import org.wltea.analyzer.lucene.IKAnalyzer;
import com.ossean.match.dao.ProjectDao;
-import com.ossean.match.model.Project;
import com.ossean.match.utils.Normalizer;
public class LuceneSearch {
@@ -89,7 +84,7 @@ public class LuceneSearch {
}
query.add(tq, BooleanClause.Occur.SHOULD);
}
- TopDocs td = is.search(query, 3);
+ TopDocs td = is.search(query, 10000);
ScoreDoc[] sds = td.scoreDocs;
for (ScoreDoc sd : sds) {
Document d = is.doc(sd.doc);
@@ -98,10 +93,6 @@ public class LuceneSearch {
for(String prjName : prjNames){
if (keyWords.contains(prjName)) {
int pId = Integer.parseInt(prjId);
- Project currentPrj = projectDao.getPrjById(pId);
- if(currentPrj.getFiltration()==0){
-
- }
if (matchMap.containsKey(pId)) {
matchMap.put(pId, matchMap.get(pId) + weight + sd.score/1000);
} else
@@ -130,59 +121,45 @@ public class LuceneSearch {
*/
public static HashMap prjToMemoMatchByLucene(
String prjName, String searchField, double weight,
- HashMap map, IndexReader memoIndexReader, IndexReader prjIndexReader) {
+ HashMap map, IndexReader memoIndexReader) {
try {
IndexSearcher is = new IndexSearcher(memoIndexReader);
List prjNameList = Normalizer.getList(prjName);
BooleanQuery query = new BooleanQuery();
-// Similarity similarity = new DefaultSimilarity(){
-// @Override
-// public float queryNorm(float sumOfSquaredWeights) {
-// return 1.0f;
-// }
-// @Override
-// public float lengthNorm(FieldInvertState state) {
-// return 1.0f;
-// }
-// };
-// is.setSimilarity(similarity);
for(String prjNameTerm : prjNameList){
- //Term termForFreq = new Term(LuceneIndex.prjNameFieldName, prjNameTerm);
Term term = new Term(searchField, prjNameTerm);
TermQuery tq = new TermQuery(term);
-// double curTermDocFreq = prjIndexReader.docFreq(termForFreq);
-// if (prjNameList.size() > 1 && curTermDocFreq > 10) {
-// tq.setBoost((float) (1/(curTermDocFreq/10)));
-// }
-// else {
-// tq.setBoost(1.1f);
-// }
- query.add(tq, BooleanClause.Occur.MUST);
+ query.add(tq, BooleanClause.Occur.MUST); //项目名分词后的每个term都必须在帖子中出现
}
TopDocs td = is.search(query, 1000000);
ScoreDoc[] sds = td.scoreDocs;
for (ScoreDoc sd : sds) {
Document d = is.doc(sd.doc);
-// if (sd.score >= 0.7) {
String postId = d.get(LuceneIndex.memoIdFieldName);
int pId = Integer.parseInt(postId);
if (map.containsKey(pId)) {
map.put(pId, map.get(pId) + weight + sd.score/1000);
} else
map.put(pId, weight + sd.score/1000);
-// }
}
} catch (IOException e) {
logger.error("prjToMemoMatchByLucene IOException: " + e);
- } /*catch (ParseException e) {
- logger.error("prjToMemoMatchByLucene ParseException: " + e);
- }*/
+ }
return map;
}
- public static HashMap searchMemoTags(String tagStr, String searchField, double weight,
+ /**
+ * 项目名和项目别名与帖子标签之间的匹配
+ * @param tagStr
+ * @param searchField
+ * @param weight
+ * @param map
+ * @param indexReader
+ * @return
+ */
+ public static HashMap searchMemoTags(String tagStr, String searchField, String idField, double weight,
HashMap map, IndexReader indexReader) {
try {
@@ -193,11 +170,11 @@ public class LuceneSearch {
ScoreDoc[] sds = td.scoreDocs;
for (ScoreDoc sd : sds) {
Document d = is.doc(sd.doc);
- String postId = d.get(LuceneIndex.memoIdFieldName);
+ String postId = d.get(idField);
int pId = Integer.parseInt(postId);
- if (map.containsKey(pId)) {
+ if (map.containsKey(pId)) {
map.put(pId, map.get(pId) + weight);
- } else {
+ } else {
map.put(pId, weight);
}
}
@@ -207,6 +184,16 @@ public class LuceneSearch {
return map;
}
+ /**
+ * 项目标签与帖子标签
+ * @param tagStr
+ * @param searchField
+ * @param idField
+ * @param weight
+ * @param map
+ * @param indexReader
+ * @return
+ */
public static HashMap searchByPrjTag(String tagStr, String searchField, String idField, double weight,
HashMap map, IndexReader indexReader) {
@@ -220,7 +207,7 @@ public class LuceneSearch {
Document d = is.doc(sd.doc);
String postId = d.get(idField);
int pId = Integer.parseInt(postId);
- if (map.containsKey(pId)) {
+ if (map.containsKey(pId)) { //有项目名和项目别名的匹配时才加入标签匹配的结果
map.put(pId, map.get(pId) + weight);
}
}
@@ -230,6 +217,16 @@ public class LuceneSearch {
return map;
}
+ /**
+ * 项目标签搜索帖子标题
+ * @param tagStr
+ * @param searchField
+ * @param idField
+ * @param weight
+ * @param map
+ * @param indexReader
+ * @return
+ */
public static HashMap searchByPrjTagInMemoTitle(String tagStr, String searchField, String idField, double weight,
HashMap map, IndexReader indexReader) {
@@ -260,34 +257,34 @@ public class LuceneSearch {
// 获得每个帖子匹配到的标签个数
- public static HashMap tagsMatch(String idField,
- String tags, String searchField, IndexReader indexReader) {
- HashMap tagsMatchNum = new HashMap();
- try {
- IndexSearcher is = new IndexSearcher(indexReader);
- QueryParser parser = new QueryParser(searchField, new IKAnalyzer(true));
- Query query = parser.parse(tags);
- TopDocs td = is.search(query, 100000);
- ScoreDoc[] sds = td.scoreDocs;
- for (ScoreDoc sd : sds) {
- Document d = is.doc(sd.doc);
- String postId = d.get(idField);
- int pId = Integer.parseInt(postId);
- Explanation explanation = is.explain(query, sd.doc);
- int hitNum = getHitTermsNum(explanation);
- if (tagsMatchNum.containsKey(pId)) {
- tagsMatchNum.put(pId, tagsMatchNum.get(pId) + hitNum);
- } else
- tagsMatchNum.put(pId, hitNum);
- }
- } catch (IOException e) {
- logger.error("tagsMatch IOException: " + e);
- } catch (ParseException e) {
- logger.error("tagsMatch ParseException: " + e);
- }
-
- return tagsMatchNum;
- }
+// public static HashMap tagsMatch(String idField,
+// String tags, String searchField, IndexReader indexReader) {
+// HashMap tagsMatchNum = new HashMap();
+// try {
+// IndexSearcher is = new IndexSearcher(indexReader);
+// QueryParser parser = new QueryParser(searchField, new IKAnalyzer(true));
+// Query query = parser.parse(tags);
+// TopDocs td = is.search(query, 100000);
+// ScoreDoc[] sds = td.scoreDocs;
+// for (ScoreDoc sd : sds) {
+// Document d = is.doc(sd.doc);
+// String postId = d.get(idField);
+// int pId = Integer.parseInt(postId);
+// Explanation explanation = is.explain(query, sd.doc);
+// int hitNum = getHitTermsNum(explanation);
+// if (tagsMatchNum.containsKey(pId)) {
+// tagsMatchNum.put(pId, tagsMatchNum.get(pId) + hitNum);
+// } else
+// tagsMatchNum.put(pId, hitNum);
+// }
+// } catch (IOException e) {
+// logger.error("tagsMatch IOException: " + e);
+// } catch (ParseException e) {
+// logger.error("tagsMatch ParseException: " + e);
+// }
+//
+// return tagsMatchNum;
+// }
//get the number of terms hitted in docs
public static int getHitTermsNum(Explanation explanation){
diff --git a/match_program/src/main/java/com/ossean/match/main/Main.java b/match_program/src/main/java/com/ossean/match/main/Main.java
index 18557c2..f2d53df 100644
--- a/match_program/src/main/java/com/ossean/match/main/Main.java
+++ b/match_program/src/main/java/com/ossean/match/main/Main.java
@@ -9,7 +9,6 @@ import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Component;
import com.ossean.match.lucene.LuceneIndex;
-import com.ossean.match.matchprocess.CountFrequency;
import com.ossean.match.matchprocess.MatchIncrement;
import com.ossean.match.matchprocess.MemoToPrjMatch;
import com.ossean.match.matchprocess.NewPrjMonitor;
@@ -29,10 +28,6 @@ public class Main {
@Autowired
private PrjToMemoMatch prjToMemoMatch;
- @Qualifier("countfrequency")
- @Autowired
- private CountFrequency countFrequency;
-
@Qualifier("luceneindex")
@Autowired
private LuceneIndex luceneIndex;
@@ -49,31 +44,17 @@ public class Main {
public void start() throws InterruptedException, ParseException {
while (true) {
MatchIncrement matchIncrement = new MatchIncrement();
- if (matchIncrement.getTaskToPrjId() != 0) { //判断是否设置了截止项目id
- prjToMemoMatch.setMatchIncrement(matchIncrement);
- countFrequency.setMatchIncrement(matchIncrement);
- if (matchIncrement.getPrjHistory() == 0 && matchIncrement.getMemoHistory() == 0) {
- luceneIndex.run(matchIncrement);
- }
- newPrjMonitor.run();
- countFrequency.run();
- prjToMemoMatch.run();
- }
- else {
- newPrjMonitor.setMatchIncrement(matchIncrement);
- memoToPrjMatch.setMatchIncrement(matchIncrement);
- prjToMemoMatch.setMatchIncrement(matchIncrement);
- countFrequency.setMatchIncrement(matchIncrement);
-
- if (matchIncrement.getPrjHistory() == 0 && matchIncrement.getMemoHistory() == 0) {
- luceneIndex.run(matchIncrement);
- }
-
- newPrjMonitor.run();
- countFrequency.run();
- prjToMemoMatch.run();
- memoToPrjMatch.run();
+ newPrjMonitor.setMatchIncrement(matchIncrement);
+ memoToPrjMatch.setMatchIncrement(matchIncrement);
+ prjToMemoMatch.setMatchIncrement(matchIncrement);
+
+ if (matchIncrement.getMemoHistory() == 0) {
+ luceneIndex.run(matchIncrement);
}
+
+ newPrjMonitor.run();
+ prjToMemoMatch.run();
+ memoToPrjMatch.run();
if (matchIncrement.getSleepTime() > 0) {
logger.info(".........sleeping.........." + matchIncrement.getSleepTime()/1000 + "s......");
}
diff --git a/match_program/src/main/java/com/ossean/match/matchprocess/CountFrequency.java b/match_program/src/main/java/com/ossean/match/matchprocess/CountFrequency.java
deleted file mode 100644
index 683347f..0000000
--- a/match_program/src/main/java/com/ossean/match/matchprocess/CountFrequency.java
+++ /dev/null
@@ -1,364 +0,0 @@
-package com.ossean.match.matchprocess;
-
-import java.io.IOException;
-import java.nio.file.Paths;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import javax.annotation.Resource;
-
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.queryparser.classic.ParseException;
-import org.apache.lucene.queryparser.classic.QueryParser;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.store.Directory;
-import org.apache.lucene.store.FSDirectory;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.springframework.stereotype.Component;
-import org.wltea.analyzer.lucene.IKAnalyzer;
-
-import com.ossean.match.dao.AtomicItemDao;
-import com.ossean.match.dao.ProjectDao;
-import com.ossean.match.dao.TagDao;
-import com.ossean.match.lucene.LuceneIndex;
-import com.ossean.match.model.Atom;
-import com.ossean.match.model.Project;
-import com.ossean.match.model.Tag;
-import com.ossean.match.pipeline.AtomicItemPipeline;
-import com.ossean.match.pipeline.ResultPipeline;
-import com.ossean.match.utils.Extractor;
-import com.ossean.match.utils.Normalizer;
-import com.ossean.match.utils.SimilarityCounter;
-
-@Component("countfrequency")
-public class CountFrequency {
-
- @Resource
- private AtomicItemPipeline atomicItemPipeline;
- @Resource
- private AtomicItemDao atomicItemDao;
- @Resource
- private ProjectDao projectDao;
- @Resource
- private TagDao tagDao;
- @Resource
- private ResultPipeline resultPipeline;
-
- public static List atomList;
- // = atomicItemDao.selectAllAtom();
-
- public static Map> atoms;
- // = transformAtoms(atomList);
-
- public static String tagIndexDir = "tagIndexDir";
-
- private Logger logger = LoggerFactory.getLogger(getClass());
- MatchIncrement matchIncrement = null;
-
- public void setMatchIncrement(MatchIncrement matchIncrement) {
- this.matchIncrement = matchIncrement;
- }
-
- private static Map> transformAtoms(
- List atomList) {
- // TODO Auto-generated method stub
- ArrayList nums = null;
- Map> atoms = new HashMap>();
- String name = null;
- for (Atom atom : atomList) {
- nums = new ArrayList(2);
- nums.add(atom.getNumTag());
- nums.add(atom.getNumInProj());
- nums.add(atom.getStatus());
- name = atom.getName();
- atoms.put(name, nums);
- }
- return atoms;
- }
-
- private List retransformAtoms(Map> atoms) {
- List atomList = new ArrayList();
- Set items = atoms.keySet();
- for (String item : items) {
- Atom atom = new Atom();
- atom.setName(item);
- atom.setNumInTag(atoms.get(item).get(0));
- atom.setNumInProj(atoms.get(item).get(1));
- atom.setStatus(atoms.get(item).get(2));
- atomList.add(atom);
- }
- return atomList;
- }
-
- private void extractTags(List tags,
- Map> atoms) {
- for (Tag tag : tags) {
- List items = Extractor.extractAtoms(tag.getName());
- logger.info("extract tag : " + tag.getId() + ","
- + tag.getName() + ">>" + items);
- saveAtoms(items, atoms, 0);
-
- }
- }
-
- private void extractProjects(List projects,
- Map> atoms) {
- for (Project project : projects) {
- List items = Extractor.extractAtoms(project.getName());
- logger.info("extract project : " + project.getId() + ","
- + project.getName() + ">>" + items);
- saveAtoms(items, atoms, 1);
- }
- }
-
- /**
- * @param items 抽取元素
- * @param atoms 存储抽取到的元素
- * @param pos 用来标注项目和标签,1为项目,0为标签
- */
- private void saveAtoms(List items,
- Map> atoms, int pos) {
-
- ArrayList nums = null;
- for (String item : items) {
- if (atoms.containsKey(item)) {
- nums = atoms.get(item);
- if(nums.get(2) == 0) {
- nums.set(2, 1);
- }
- } else {
- nums = new ArrayList(2);
- nums.add(0);
- nums.add(0);
- nums.add(2);
- }
- nums.set(pos, nums.get(pos) + 1);
- atoms.put(item, nums);
- }
- }
-
- // 对项目(projName)和标签进行匹配,并将匹配结果存入数据库
- private List queryMatch(String projName, int projId, Map> atoms)
- throws ParseException {
- Directory dir;
- Analyzer analyzer = new IKAnalyzer(true);
- List resultStrings = new ArrayList();
- try {
- dir = FSDirectory.open(Paths.get(CountFrequency.tagIndexDir));
- IndexReader reader = DirectoryReader.open(dir);
- IndexSearcher searcher = new IndexSearcher(reader);
- QueryParser parser = new QueryParser("items", analyzer);
- Query query = null;
- if (projName != null && !"".equals(Normalizer.normalize(projName))) {
- query = parser.parse(QueryParser.escape(Normalizer
- .normalize(projName)));
-
- TopDocs hits = searcher.search(query, 1000);
- if (hits.scoreDocs != null) {
- for (ScoreDoc sc : hits.scoreDocs) {
- int docNum = sc.doc;
- int tagId = Integer.parseInt(searcher.doc(docNum).get(
- "id"));
- String tagName = searcher.doc(docNum).get("name");
- // System.out.println("match: " + projId + " - " + tagId
- // +
- // "("
- // + tagName + ")");
- // System.out.println("completely matched tag >>> id: "
- // + tagId + ",name: " + tagOriginalName);
- List itemsOfPrj = Extractor
- .extractAtoms(projName);
- List itemsOfTag = Extractor
- .extractAtoms(tagName);
-
- int prjWeight[] = new int[itemsOfPrj.size()];
- int tagWeight[] = new int[itemsOfTag.size()];
-
- int i = 0;
- for (String item : itemsOfPrj) {
- prjWeight[i++] = atoms.get(item).get(1);
- }
- i = 0;
- for (String item : itemsOfTag) {
- tagWeight[i++] = atoms.get(item).get(0);
- }
-
- float score = SimilarityCounter.countSimilarity(
- itemsOfPrj, prjWeight, itemsOfTag, tagWeight);
- float EPSINON = 0.999F;
- if (!((score >= -EPSINON) && (score <= EPSINON))) {
- resultPipeline.insertResult3(projId, projName,
- tagId, tagName, score);
- resultStrings.add(tagName);
- }
- }
- }
- }
-
- } catch (IOException e) {
- e.printStackTrace();
- }
- return resultStrings;
- }
-
- // 对项目(projName)和新标签进行匹配,并将匹配结果存入数据库
- private List queryMatchNewTag(String tagName, int tagId, Map> atoms)
- throws ParseException {
- // Directory dir;
- tagName = tagName.trim();
- Analyzer analyzer = new IKAnalyzer(true);
- List resultStrings = new ArrayList();
- try {
- Directory dire = FSDirectory.open(Paths
- .get(LuceneIndex.PRJS_INDEX_PATH));
- IndexReader indexReader = DirectoryReader.open(dire);
-
- // dir = FSDirectory.open(Paths.get(CountFrequency.tagIndexDir));
- // IndexReader reader = DirectoryReader.open(dir);
- IndexSearcher searcher = new IndexSearcher(indexReader);
- QueryParser parser = new QueryParser("items", analyzer);
- Query query = null;
- if (tagName != null && !"".equals(Normalizer.normalize(tagName))) {
- query = parser.parse(QueryParser.escape(Normalizer
- .normalize(tagName)));
- TopDocs hits = searcher.search(query, 1000);
-
- if (hits.scoreDocs != null) {
- for (ScoreDoc sc : hits.scoreDocs) {
- int docNum = sc.doc;
- int projId = Integer.parseInt(searcher.doc(docNum).get(
- LuceneIndex.prjIdFieldName));
- String projName = searcher.doc(docNum).get(
- LuceneIndex.prjNameFieldName);
- // System.out.println("match: " + projId + " - " + tagId
- // +
- // "("
- // + tagName + ")");
- // System.out.println("completely matched tag >>> id: "
- // + tagId + ",name: " + tagOriginalName);
- List itemsOfPrj = Extractor
- .extractAtoms(projName);
- List itemsOfTag = Extractor
- .extractAtoms(tagName);
-
- int prjWeight[] = new int[itemsOfPrj.size()];
- int tagWeight[] = new int[itemsOfTag.size()];
-
- int i = 0;
- for (String item : itemsOfPrj) {
- prjWeight[i++] = atoms.get(item).get(1);
- }
- i = 0;
- for (String item : itemsOfTag) {
- tagWeight[i++] = atoms.get(item).get(0);
- }
-
- float score = SimilarityCounter.countSimilarity(
- itemsOfPrj, prjWeight, itemsOfTag, tagWeight);
- float EPSINON = 0.999F;
- if (!((score >= -EPSINON) && (score <= EPSINON))) {
- // TODO pipeline
- resultPipeline.insertResult3(projId, projName,
- tagId, tagName, score);
- resultStrings.add(projName);
- }
- }
- }
- }
-
- } catch (IOException e) {
- // TODO Auto-generated catch block
- e.printStackTrace();
- }
- return resultStrings;
- }
-
- public void run() throws ParseException {
- atomList = atomicItemDao.selectAllAtom();
- atoms = transformAtoms(atomList);
- if (matchIncrement.getPrePrjHistory() == 0
- && matchIncrement.getTagHistory() == 0) {
- List tags = tagDao.getBatchTag(matchIncrement.getTagHistory(),
- matchIncrement.getLastTagId());
- // 建立索引
- LuceneIndex.buildTagIndex(tags);
-
- List projects = projectDao.getBatchPrjsIncre(2000000);
-
- // 抽取项目
- extractProjects(projects, atoms);
- matchIncrement.getMatchRecord();
- matchIncrement.setPrePrjHistory(matchIncrement.getLastPrjId());
-
- // 抽取标签
- extractTags(tags, atoms);
- matchIncrement.setTagHistory(matchIncrement.getLastTagId());
-
-// atomList.clear();
-// atomList = retransformAtoms(atoms);
-// atomicItemPipeline.pipelineAtoms(atomList);
-
- // 新项目与标签匹配
- for (Project project : projects) {
- queryMatch(project.getName().trim(),
- project.getId(), atoms);
- }
- matchIncrement.writeIntoRecord();
- } else {
- if (matchIncrement.isNewPrePrjFlag()) {
- logger.info("start count frequency of projects match!!!");
- List projects = projectDao.getBatchPrjs(
- matchIncrement.getPrjHistory(),
- matchIncrement.getLastPrjId());
-
- // 抽取项目
- extractProjects(projects, atoms);
- matchIncrement.setPrePrjHistory(matchIncrement
- .getLastPrjId());
-
- // 新项目与标签匹配
- for (Project project : projects) {
- List tags = new ArrayList();
- tags = queryMatch(project.getName().trim(),
- project.getId(),atoms);
-
- }
- matchIncrement.writeIntoRecord();
- }
- if (matchIncrement.isNewTagflag()) {
- logger.info("start count frequency of projects match!!!");
-
- List tags = tagDao.getBatchTag(
- matchIncrement.getTagHistory(),
- matchIncrement.getLastTagId());
- // 建立索引
- LuceneIndex.buildTagIndex(tags);
- // 抽取项目
- extractTags(tags, atoms);
- matchIncrement.setTagHistory(matchIncrement.getLastTagId());
-
- // 新标签与项目匹配
- for (Tag tag : tags) {
- List projects = new ArrayList();
- projects = queryMatchNewTag(tag.getName().trim(),
- tag.getId(), atoms);
-
- }
- matchIncrement.writeIntoRecord();
- }
- }
- atomList.clear();
- atomList = retransformAtoms(atoms);
- atomicItemPipeline.pipelineAtoms(atomList);
- }
-
-}
diff --git a/match_program/src/main/java/com/ossean/match/matchprocess/Match.java b/match_program/src/main/java/com/ossean/match/matchprocess/Match.java
index 81daf4e..e229c98 100644
--- a/match_program/src/main/java/com/ossean/match/matchprocess/Match.java
+++ b/match_program/src/main/java/com/ossean/match/matchprocess/Match.java
@@ -5,7 +5,6 @@ import java.nio.file.Paths;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
-import java.util.Set;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
@@ -24,7 +23,6 @@ import com.ossean.match.lucene.LuceneSearch;
import com.ossean.match.model.Project;
import com.ossean.match.model.RelativeMemo;
import com.ossean.match.model.RelativeMemoToOpenSourceProject;
-import com.ossean.match.pipeline.ResultPipeline;
import com.ossean.match.utils.Normalizer;
import javax.annotation.Resource;
@@ -38,8 +36,6 @@ public class Match {
private double titleWeight = 0.8;
private double threshholdWeight = 1.3;
- @Resource
- private ResultPipeline resultPipeline;
@Resource
private RelativeMemoToOpenSourceProjectDao matchResultDao;
@Resource
@@ -59,14 +55,11 @@ public class Match {
try {
Directory memoDire = FSDirectory.open(Paths
.get(LuceneIndex.MEMOS_INDEX_PATH));
- Directory prjDire = FSDirectory.open(Paths
- .get(LuceneIndex.PRJS_INDEX_PATH));
if (!DirectoryReader.indexExists(memoDire)) { // if the index directory
// doesn't exist
return;
}
IndexReader memoIndexReader = DirectoryReader.open(memoDire);
- IndexReader prjIndexReader = DirectoryReader.open(prjDire);
for(Project project : projects) {
int prjId = 0;
String prjName = "";
@@ -78,11 +71,14 @@ public class Match {
if(project.getTags() != null)
prjTags = project.getTags().toLowerCase();
prjName = project.getName().toLowerCase();
- //long start1 = System.currentTimeMillis();
- // 项目名匹配帖子标题
if (prjName.length() >= 2 && !Normalizer.isAllNumber(prjName)) {
+ // 项目名匹配帖子标题
matchMap = LuceneSearch.prjToMemoMatchByLucene(prjName,
- LuceneIndex.titleFieldName, titleWeight, matchMap, memoIndexReader, prjIndexReader);
+ LuceneIndex.titleFieldName, titleWeight, matchMap, memoIndexReader);
+ // 项目名匹配帖子标签
+ matchMap = LuceneSearch.searchMemoTags(
+ prjName, LuceneIndex.memoTagsFieldName, LuceneIndex.memoIdFieldName, memoTagsWeight,
+ matchMap, memoIndexReader);
}
if (project.getSynonyms() != null) {
synonyms = project.getSynonyms().toLowerCase();
@@ -91,88 +87,39 @@ public class Match {
//项目别名匹配帖子标题 TODO
for (String synonym : synonymsList) {
matchMap = LuceneSearch.prjToMemoMatchByLucene(synonym,
- LuceneIndex.titleFieldName, titleWeight, matchMap, memoIndexReader, prjIndexReader);
+ LuceneIndex.titleFieldName, titleWeight, matchMap, memoIndexReader);
}
//项目别名匹配帖子标签 TODO
for (String synonym : synonymsList) {
matchMap = LuceneSearch.searchMemoTags(
- synonym, LuceneIndex.memoTagsFieldName, memoTagsWeight,
+ synonym, LuceneIndex.memoTagsFieldName, LuceneIndex.memoIdFieldName, memoTagsWeight,
matchMap, memoIndexReader);
}
}
}
- //long end1 = System.currentTimeMillis();
- //logger.info("项目名匹配帖子标题: " + (end1 - start1) + "ms");
- // 项目名匹配帖子标签 TODO :词频统计可直接从索引获得
- if (prjName.length() >= 2 && !Normalizer.isAllNumber(prjName)) {
- Set tagStrings = resultPipeline.getTagbyPrjId(prjId); // 这里要返回标签匹配到的项目id;
- for (String tag : tagStrings) {
- matchMap = LuceneSearch.searchMemoTags(
- tag.toLowerCase(), LuceneIndex.memoTagsFieldName, memoTagsWeight,
- matchMap, memoIndexReader);
- }
- }
- //long end2 = System.currentTimeMillis();
- //logger.info("项目名匹配帖子标签: " + (end2 - end1) + "ms");
- // 用项目标签搜索帖子标题
- //HashMap tagsCount = null;
if (prjTags != null && prjTags.length() > 0 && matchMap.size() > 0) {
List prjTagsList = Normalizer.tagsSegmentation(prjTags);
+ // 用项目标签搜索帖子标题
for (String prjTag : prjTagsList) {
matchMap = LuceneSearch.searchByPrjTagInMemoTitle(
prjTag, LuceneIndex.titleFieldName, LuceneIndex.memoIdFieldName, prjTagsToMemoTitleWeight,
matchMap, memoIndexReader);
}
- /*tagsCount = LuceneSearch.tagsMatch(
- LuceneIndex.memoIdFieldName, prjTags,
- LuceneIndex.titleFieldName, indexReader);
- // tagsCount是,指每个post的标题匹配到的tag数量
- if (tagsCount.size() > 0) {
- for (Map.Entry t : tagsCount.entrySet()) {
- int postId = t.getKey();
- int x = t.getValue();
- if (matchMap.containsKey(postId)) {
- matchMap.put(postId, matchMap.get(postId) + matchMap.get(postId) * 0.5 * (Math.log(x * x + 1) / Math.log(2)));
- }
- }
- }*/
- //long end3 = System.currentTimeMillis();
- //logger.info("用项目标签搜索帖子标题: " + (end3 - end2) + "ms");
// 用项目标签搜索帖子标签
for (String prjTag : prjTagsList) {
matchMap = LuceneSearch.searchByPrjTag(
prjTag, LuceneIndex.memoTagsFieldName, LuceneIndex.memoIdFieldName, prjTagsToMemoTagsWeight,
matchMap, memoIndexReader);
}
- /*tagsCount = LuceneSearch.tagsMatch(
- LuceneIndex.memoIdFieldName, prjTags,
- LuceneIndex.memoTagsFieldName, indexReader);
- if (tagsCount.size() > 0) {
- for (Map.Entry t : tagsCount
- .entrySet()) {
- int postId = t.getKey();
- int y = t.getValue();
- if (matchMap.containsKey(postId)) {
- matchMap.put(postId, matchMap.get(postId) + matchMap.get(postId) * 0.6 * (Math.log(y * y + 1) / Math.log(2)));
- }
- }
- }*/
- //long end4 = System.currentTimeMillis();
- //logger.info("用项目标签搜索帖子标签: " + (end4 - end3) + "ms");
}
- //long end5 = System.currentTimeMillis();
if (matchMap.size() > 0)
insertPrjToMemoMatchResult(prjId, matchMap);
matchMap.clear();
- //long end6 = System.currentTimeMillis();
- //logger.info("当前项目匹配结果入库: " + (end6 - end5) + "ms");
projectDao.updateProcessedPrj(prjId);
logger.info("current prjId: " + prjId);
}
memoIndexReader.close();
memoDire.close();
- prjIndexReader.close();
- prjDire.close();
} catch (IOException e) {
logger.error("prjToMemoMatch io error in Match: " + e);
}
@@ -263,13 +210,13 @@ public class Match {
public void memoToPrjMatch(List memos) {
HashMap matchMap = new HashMap();
try {
- Directory dire = FSDirectory.open(Paths
+ Directory prjDire = FSDirectory.open(Paths
.get(LuceneIndex.PRJS_INDEX_PATH));
- if (!DirectoryReader.indexExists(dire)) { // if the index directory
+ if (!DirectoryReader.indexExists(prjDire)) { // if the index directory
// doesn't exist
return;
}
- IndexReader indexReader = DirectoryReader.open(dire);
+ IndexReader prjIndexReader = DirectoryReader.open(prjDire);
for(RelativeMemo memo : memos) {
int memoId = 0;
String memoTitle = "";
@@ -288,88 +235,48 @@ public class Match {
if (Normalizer.isAllNumber(memoTitle)) {
continue;
}
- //long start1 = System.currentTimeMillis();
List memoTitleList = Normalizer.getList(memoTitle);
if (memoTitleList.size() > 0) {
// 帖子标题匹配项目名
matchMap = LuceneSearch.memoToPrjMatchByLucene(memoTitle,
memoTitleList, LuceneIndex.prjNameFieldName,
- titleWeight, matchMap, indexReader);
+ titleWeight, matchMap, prjIndexReader);
//帖子标题搜索项目别名
matchMap = LuceneSearch.memoToPrjMatchByLucene(memoTitle,
memoTitleList, LuceneIndex.prjSynonymsFieldName,
- titleWeight, matchMap, indexReader);
+ titleWeight, matchMap, prjIndexReader);
}
- //long end1 = System.currentTimeMillis();
- //logger.info("帖子标题匹配项目名: " + (end1 - start1) + "ms");
+ List memoTagsList = Normalizer.tagsSegmentation(memo.getTags().toLowerCase());
// 用帖子标签搜索项目名
- if (memoTags.length() > 0) {
- Set projectIds = resultPipeline.getPrjIdForMemo(memoId); // 这里要返回标签匹配到的项目id;
- for (int pId : projectIds) {
- if (matchMap.containsKey(pId)){
- matchMap.put(pId, matchMap.get(pId) + memoTagsWeight);
- }
- else {
- matchMap.put(pId, memoTagsWeight);
- }
+ if (memoTags.length() > 0 && memoTagsList.size() > 0) {
+ //用帖子标签搜索项目名
+ for (String memoTag : memoTagsList) {
+ matchMap = LuceneSearch.searchMemoTags(memoTag,
+ LuceneIndex.prjNameFieldName, LuceneIndex.prjIdFieldName, memoTagsWeight, matchMap, prjIndexReader);
+ }
+ //用帖子标签搜索项目别名
+ for (String tag : memoTagsList) {
+ matchMap = LuceneSearch.searchMemoTags(tag,
+ LuceneIndex.prjSynonymsFieldName, LuceneIndex.prjIdFieldName, memoTagsWeight, matchMap, prjIndexReader);
}
}
- //long end2 = System.currentTimeMillis();
- //logger.info("帖子标签搜索项目名: " + (end2 - end1) + "ms");
if (matchMap.size() > 0) {
// 帖子标题匹配项目标签
if (memoTitleList != null && memoTitleList.size() > 0) {
for (String memoTitleTerm : memoTitleList) {
matchMap = LuceneSearch.searchByPrjTag(
memoTitleTerm, LuceneIndex.prjTagsFieldName, LuceneIndex.prjIdFieldName, prjTagsToMemoTitleWeight,
- matchMap, indexReader);
+ matchMap, prjIndexReader);
}
}
- //long end3 = System.currentTimeMillis();
- //logger.info("帖子标题匹配项目标签: " + (end3 - end2) + "ms");
- /*HashMap tagsCount = null;
- tagsCount = LuceneSearch.tagsMatch(
- LuceneIndex.prjIdFieldName, memoTitle,
- LuceneIndex.prjTagsFieldName, indexReader);
- // tagsCount是,x指匹配到prjTag的数量
- if (tagsCount.size() > 0) {
- for (Map.Entry t : tagsCount
- .entrySet()) {
- int prjId = t.getKey();
- int x = t.getValue();
- if (matchMap.containsKey(prjId)) {
- matchMap.put(prjId, matchMap.get(prjId) + matchMap.get(prjId) * 0.5
- * (Math.log(x * x + 1) / Math.log(2)));
- }
- }
- } */
-
// 用帖子标签搜索项目标签
if (memo.getTags() != null && memo.getTags().length() > 0) {
- List memoTagsList = Normalizer.tagsSegmentation(memo.getTags().toLowerCase());
for (String memoTag : memoTagsList) {
matchMap = LuceneSearch.searchByPrjTag(
memoTag, LuceneIndex.prjTagsFieldName, LuceneIndex.prjIdFieldName, prjTagsToMemoTagsWeight,
- matchMap, indexReader);
+ matchMap, prjIndexReader);
}
}
- //long end4 = System.currentTimeMillis();
- //logger.info("帖子标签搜索项目标签: " + (end4 - end3) + "ms");
- /*if (memoTags.length() > 0) {
- tagsCount = LuceneSearch.tagsMatch(
- LuceneIndex.prjIdFieldName, memoTags,
- LuceneIndex.prjTagsFieldName, indexReader);
- if (tagsCount.size() > 0) {
- for (Map.Entry t : tagsCount.entrySet()) {
- int prjId = t.getKey();
- int y = t.getValue();
- if (matchMap.containsKey(prjId)) {
- matchMap.put(prjId, matchMap.get(prjId) + matchMap.get(prjId) * 0.6
- * (Math.log(y * y + 1) / Math.log(2)));
- }
- }
- }
- }*/
if (matchMap.size() > 0)
insertMemoToPrjMatchResult(memoId, matchMap);
}
@@ -377,8 +284,8 @@ public class Match {
matchMap.clear();
logger.info("current MemoId: " + memoId);
}
- indexReader.close();
- dire.close();
+ prjIndexReader.close();
+ prjDire.close();
} catch (Exception e) {
logger.error("memoToPrjMatch error in Match: " + e);
}
diff --git a/match_program/src/main/java/com/ossean/match/matchprocess/MatchIncrement.java b/match_program/src/main/java/com/ossean/match/matchprocess/MatchIncrement.java
index f4b3a6b..c12e897 100644
--- a/match_program/src/main/java/com/ossean/match/matchprocess/MatchIncrement.java
+++ b/match_program/src/main/java/com/ossean/match/matchprocess/MatchIncrement.java
@@ -11,21 +11,11 @@ import org.slf4j.LoggerFactory;
public class MatchIncrement extends Thread implements Runnable {
private Logger logger = LoggerFactory.getLogger(getClass());
- private int prjHistory; // the record of project after matching last time
private int memoHistory; // the record of memo after matching last time
- private int prjIndexed; //the record of prj indexed to
- private int memoIndexed; //the record of memo indexed to
private static File record; // record file "record.txt"
private boolean newPrjComing; //新项目进入标识
private boolean newMemoComing; //新帖子进入标识
private boolean currentMatchDone = true; //当前批量匹配是否结束
- private int lastPrjId = 0; //最大项目id
- private int lastTagId = 0; //最大标签id
- private int tagHistory = 0; //标签匹配记录
- private boolean newTagflag; //新标签进入标识
- private boolean newPrePrjFlag; //新的项目需要做标签匹配
- private int prePrjHistory = 0; //项目与标签匹配记录
- private int taskToPrjId = 0;
private int sleepTime = 0;
public boolean isNewPrjComing() {
@@ -52,91 +42,11 @@ public class MatchIncrement extends Thread implements Runnable {
this.sleepTime = sleepTime;
}
- public int getTaskToPrjId() {
- return taskToPrjId;
- }
-
- public void setTaskToPrjId(int taskToPrjId) {
- this.taskToPrjId = taskToPrjId;
- }
-
- public int getMemoIndexed() {
- return memoIndexed;
- }
-
- public void setMemoIndexed(int memoIndexed) {
- this.memoIndexed = memoIndexed;
- }
-
- public int getPrjIndexed() {
- return prjIndexed;
- }
-
- public void setPrjIndexed(int prjIndexed) {
- this.prjIndexed = prjIndexed;
- }
-
- public boolean isNewPrePrjFlag() {
- return newPrePrjFlag;
- }
-
- public void setNewPrePrjFlag(boolean newPrePrjFlag) {
- this.newPrePrjFlag = newPrePrjFlag;
- }
-
- public int getPrePrjHistory() {
- return prePrjHistory;
- }
-
- public void setPrePrjHistory(int prePrjHistory) {
- this.prePrjHistory = prePrjHistory;
- }
-
- public int getTagHistory() {
- return tagHistory;
- }
-
- public void setTagHistory(int tagHistory) {
- this.tagHistory = tagHistory;
- }
-
- public boolean isNewTagflag() {
- return newTagflag;
- }
-
- public void setNewTagflag(boolean newTagflag) {
- this.newTagflag = newTagflag;
- }
-
- public void setLastPrjId(int id) {
- this.lastPrjId = id;
- }
-
- public int getLastPrjId() {
- return lastPrjId;
- }
-
- public int getLastTagId() {
- return lastTagId;
- }
-
- public void setLastTagId(int id) {
- this.lastTagId = id;
- }
-
public MatchIncrement() {
record = new File("record.txt");
getMatchRecord();
}
- public int getPrjHistory() {
- return prjHistory;
- }
-
- public synchronized void setPrjHistory(int prjHistory) {
- this.prjHistory = prjHistory;
- }
-
public int getMemoHistory() {
return memoHistory;
}
@@ -160,13 +70,7 @@ public class MatchIncrement extends Thread implements Runnable {
public void getMatchRecord() {
try {
Scanner in = new Scanner(record);
- this.setPrjHistory(in.nextInt());
this.setMemoHistory(in.nextInt());
- this.setTagHistory(in.nextInt());
- this.setPrePrjHistory(in.nextInt());
- this.setPrjIndexed(in.nextInt());
- this.setMemoIndexed(in.nextInt());
- this.setTaskToPrjId(in.nextInt());
in.close();
} catch (FileNotFoundException e) {
logger.error("getMatchRecord FileNotFoundException: " + e);
@@ -179,8 +83,7 @@ public class MatchIncrement extends Thread implements Runnable {
public void writeIntoRecord() {
try {
FileOutputStream fout = new FileOutputStream(record);
- fout.write((prjHistory + "\t" + memoHistory + "\t" + tagHistory
- + "\t" + prePrjHistory + "\t" + prjIndexed + "\t" + memoIndexed + "\t" + taskToPrjId).getBytes());
+ fout.write((memoHistory + "").getBytes());
fout.close();
} catch (FileNotFoundException e) {
logger.error("writeIntoRecord FileNotFoundException: " + e);
diff --git a/match_program/src/main/java/com/ossean/match/matchprocess/MemoToPrjMatch.java b/match_program/src/main/java/com/ossean/match/matchprocess/MemoToPrjMatch.java
index f401aea..0fb8c70 100644
--- a/match_program/src/main/java/com/ossean/match/matchprocess/MemoToPrjMatch.java
+++ b/match_program/src/main/java/com/ossean/match/matchprocess/MemoToPrjMatch.java
@@ -2,7 +2,6 @@ package com.ossean.match.matchprocess;
import java.io.IOException;
import java.sql.SQLException;
-import java.text.DecimalFormat;
import java.util.List;
import javax.annotation.Resource;
@@ -41,7 +40,6 @@ public class MemoToPrjMatch implements Runnable {
public void memoToPrjIncrement() throws SQLException,
IOException {
matchIncrement.setCurrentMatchDone(false);
- long start = System.currentTimeMillis();
matchIncrement.getMatchRecord();
int startId = matchIncrement.getMemoHistory();
int endId = startId + step;
@@ -49,25 +47,15 @@ public class MemoToPrjMatch implements Runnable {
if ((lastMemoId - endId) <= step) {
endId = lastMemoId;
}
+ LuceneIndex luceneIndex = new LuceneIndex();
while (startId < lastMemoId) {
- long batchStart = System.currentTimeMillis();
List memos = memoDao.getMemoInfo(startId, endId);
if (memos != null && memos.size() > 0) {
match.memoToPrjMatch(memos);
matchIncrement.setMemoHistory(endId);
- if (startId > matchIncrement.getMemoIndexed()) {
- LuceneIndex luceneIndex = new LuceneIndex();
- int memoIndexed = luceneIndex.memoIndexIncrement(memos);
- // adding memos index and save the record
- matchIncrement.setMemoIndexed(memoIndexed);
- }
+ // adding memos index and save the record
+ luceneIndex.memoIndexIncrement(memos);
}
- long batchEnd = System.currentTimeMillis();
- DecimalFormat df = new DecimalFormat( "0.00");
- double totalTime = (double)(batchEnd - batchStart)/60000;
- double rate = (double)(endId - startId)/((batchEnd - batchStart)/1000);
- logger.info("current Batch memo-->prj total time: " + df.format(totalTime) + "min; "
- + " average: " + df.format(rate) + " 条/s");
logger.info("relative_memos_to_osps matched to : "
+ matchIncrement.getMemoHistory());
logger.info("relative_memos_to_osps remain_to_match : "
@@ -83,13 +71,8 @@ public class MemoToPrjMatch implements Runnable {
endId += step;
}
}
- long end = System.currentTimeMillis();
- DecimalFormat df = new DecimalFormat( "0.00");
- double totalTime = (double)(end - start)/60000;
- logger.info("memo-->prj total time: " + df.format(totalTime) + "min");
matchIncrement.setCurrentMatchDone(true);
}
-
public void run() {
if (matchIncrement.isNewMemoComing()) {
logger.info("start relative_memos_to_osps match!!!");
diff --git a/match_program/src/main/java/com/ossean/match/matchprocess/NewPrjMonitor.java b/match_program/src/main/java/com/ossean/match/matchprocess/NewPrjMonitor.java
index 6bb1f80..fa93912 100644
--- a/match_program/src/main/java/com/ossean/match/matchprocess/NewPrjMonitor.java
+++ b/match_program/src/main/java/com/ossean/match/matchprocess/NewPrjMonitor.java
@@ -26,13 +26,9 @@ public class NewPrjMonitor {
public void run() {
int newPrjCount = projectDao.getNewPrjCount();
- int lastPrjId = projectDao.getNewLast();
int lastMemoId = memoDao.getLastMemoId();
- int lastTagId = tagDao.getNewLast();
if (newPrjCount > 0) {
matchIncrement.setNewPrjComing(true);
- matchIncrement.setLastPrjId(lastPrjId);
-
} else {
matchIncrement.setNewPrjComing(false);
}
@@ -42,19 +38,6 @@ public class NewPrjMonitor {
matchIncrement.setNewMemoComing(false);
}
- if (newPrjCount > 0) {
- matchIncrement.setNewPrePrjFlag(true);
- } else {
- matchIncrement.setNewPrePrjFlag(false);
- }
-
- if (lastTagId > matchIncrement.getTagHistory()) {
- matchIncrement.setNewTagflag(true);
- matchIncrement.setLastTagId(lastTagId);
-
- } else {
- matchIncrement.setNewTagflag(false);
- }
if (!matchIncrement.isNewMemoComing()&&!matchIncrement.isNewPrjComing()) {
matchIncrement.setSleepTime(30000);
}else {
diff --git a/match_program/src/main/java/com/ossean/match/matchprocess/PrjToMemoMatch.java b/match_program/src/main/java/com/ossean/match/matchprocess/PrjToMemoMatch.java
index 9249750..faae10e 100644
--- a/match_program/src/main/java/com/ossean/match/matchprocess/PrjToMemoMatch.java
+++ b/match_program/src/main/java/com/ossean/match/matchprocess/PrjToMemoMatch.java
@@ -2,7 +2,6 @@ package com.ossean.match.matchprocess;
import java.io.IOException;
import java.sql.SQLException;
-import java.text.DecimalFormat;
import java.util.List;
import javax.annotation.Resource;
@@ -34,73 +33,31 @@ public class PrjToMemoMatch implements Runnable {
public void prjToMemoIncrement() throws IOException, SQLException {
matchIncrement.setCurrentMatchDone(false);
- long start = System.currentTimeMillis();
matchIncrement.getMatchRecord();
- List newPrjs = projectDao.getBatchPrjsIncre(step);// 批量读取filtration为1或者filtration为2且update_mark不为空的项目。即未匹配的和已匹配但有更新的。
- if(newPrjs == null || newPrjs.size() == 0){
- matchIncrement.setPrjHistory(matchIncrement.getLastPrjId());
- matchIncrement.writeIntoRecord();
- }
+ // 批量读取filtration为1或者filtration为2且update_mark不为空的项目。即未匹配的和已匹配但有更新的。
+ List newPrjs = projectDao.getBatchPrjsIncre(step);
while (newPrjs != null && newPrjs.size() > 0) {
- long batchStart = System.currentTimeMillis();
- long end1 = System.currentTimeMillis();
- logger.info("批量读项目: " + (end1 - batchStart) + "ms");
if (newPrjs != null) {
match.prjToMemoMatch(newPrjs);
- matchIncrement.setPrjHistory(newPrjs.get(newPrjs.size()-1).getId());
- // adding projects index and save the record
+ // adding projects index
LuceneIndex luceneIndex = new LuceneIndex();
- int prjIndexed = luceneIndex.prjIndexIncrement(newPrjs);
- matchIncrement.setPrjIndexed(prjIndexed);
+ luceneIndex.prjIndexIncrement(newPrjs);
}
- long batchEnd = System.currentTimeMillis();
- DecimalFormat df = new DecimalFormat( "0.00");
- double totalTime = (double)(batchEnd - batchStart)/60000;
- double rate = (double)(newPrjs.size())/((batchEnd - batchStart)/1000);
- logger.info("current Batch prj-->memo total time: " + df.format(totalTime) + "min; "
- + " average: " + df.format(rate) + " 条/s");
- // finishing matching, writing the record to "record.txt"
- matchIncrement.writeIntoRecord();
- newPrjs = projectDao.getBatchPrjsIncre(step);// getting projects' id and prjName in batches
+ // getting projects' id and prjName in batches
+ newPrjs = projectDao.getBatchPrjsIncre(step);
}
- long end = System.currentTimeMillis();
- DecimalFormat df = new DecimalFormat( "0.00");
- double totalTime = (double)(end - start)/60000;
- logger.info("prj-->memo total time: " + df.format(totalTime) + "min");
matchIncrement.setCurrentMatchDone(true);
}
public void run() {
- if (matchIncrement.getTaskToPrjId() != 0) {
- int startId = matchIncrement.getPrjHistory();
- int endId = matchIncrement.getTaskToPrjId();
- long batchStart = System.currentTimeMillis();
- List newPrjs = projectDao.getBatchPrjs(startId, endId);// getting projects' id and prjName in batches
- long end1 = System.currentTimeMillis();
- logger.info("批量读项目: " + (end1 - batchStart) + "ms");
- if (newPrjs != null) {
- match.prjToMemoMatch(newPrjs);
- }
- long batchEnd = System.currentTimeMillis();
- DecimalFormat df = new DecimalFormat( "0.00");
- double totalTime = (double)(batchEnd - batchStart)/60000;
- double rate = (double)(endId - startId)/((batchEnd - batchStart)/1000);
- logger.info("current Batch prj-->memo total time: " + df.format(totalTime) + "min; "
- + " average: " + df.format(rate) + " 条/s");
- logger.info("osps_to_relative_memos matched to : "
- + matchIncrement.getPrjHistory());
- System.exit(0);
- }
- else {
- if (matchIncrement.isNewPrjComing()) {
- logger.info("start osps_to_relative_memos match!!!");
- try {
- prjToMemoIncrement();
- } catch (IOException e) {
- logger.error("IOException: " + e);
- } catch (SQLException e) {
- logger.error("SQLException: " + e);
- }
+ if (matchIncrement.isNewPrjComing()) {
+ logger.info("start osps_to_relative_memos match!!!");
+ try {
+ prjToMemoIncrement();
+ } catch (IOException e) {
+ logger.error("IOException: " + e);
+ } catch (SQLException e) {
+ logger.error("SQLException: " + e);
}
}
diff --git a/match_program/src/main/java/com/ossean/match/model/Taggings.java b/match_program/src/main/java/com/ossean/match/model/Taggings.java
deleted file mode 100644
index 583ed1d..0000000
--- a/match_program/src/main/java/com/ossean/match/model/Taggings.java
+++ /dev/null
@@ -1,68 +0,0 @@
-package com.ossean.match.model;
-
-public class Taggings {
- private int id;
- private int tag_id;
- private int taggable_id;
- private String taggable_type;
- private int tagger_id;
- private String tagger_type;
- private String context;
- private String created_at;
- private int disagree_num;
- public int getId() {
- return id;
- }
- public void setId(int id) {
- this.id = id;
- }
- public int getTag_id() {
- return tag_id;
- }
- public void setTag_id(int tag_id) {
- this.tag_id = tag_id;
- }
- public int getTaggable_id() {
- return taggable_id;
- }
- public void setTaggable_id(int taggable_id) {
- this.taggable_id = taggable_id;
- }
- public String getTaggable_type() {
- return taggable_type;
- }
- public void setTaggable_type(String taggable_type) {
- this.taggable_type = taggable_type;
- }
- public int getTagger_id() {
- return tagger_id;
- }
- public void setTagger_id(int tagger_id) {
- this.tagger_id = tagger_id;
- }
- public String getTagger_type() {
- return tagger_type;
- }
- public void setTagger_type(String tagger_type) {
- this.tagger_type = tagger_type;
- }
- public String getContext() {
- return context;
- }
- public void setContext(String context) {
- this.context = context;
- }
- public String getCreated_at() {
- return created_at;
- }
- public void setCreated_at(String created_at) {
- this.created_at = created_at;
- }
- public int getDisagree_num() {
- return disagree_num;
- }
- public void setDisagree_num(int disagree_num) {
- this.disagree_num = disagree_num;
- }
-
-}
diff --git a/match_program/src/main/java/com/ossean/match/pipeline/AtomicItemPipeline.java b/match_program/src/main/java/com/ossean/match/pipeline/AtomicItemPipeline.java
deleted file mode 100644
index b17cddf..0000000
--- a/match_program/src/main/java/com/ossean/match/pipeline/AtomicItemPipeline.java
+++ /dev/null
@@ -1,57 +0,0 @@
-package com.ossean.match.pipeline;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-
-import javax.annotation.Resource;
-
-import org.springframework.stereotype.Component;
-import org.springframework.transaction.annotation.Transactional;
-
-import com.ossean.match.dao.AtomicItemDao;
-import com.ossean.match.model.Atom;
-
-@Component
-public class AtomicItemPipeline {
-
- @Resource
- private AtomicItemDao atomicItemDao;
-
- // 把原子性的
- @Transactional
- public void pipelineAtoms(Map> atoms) {
- // TODO Auto-generated method stub
- Set atomNames = atoms.keySet();
- ArrayList nums;
- for (String name : atomNames) {
- nums = atoms.get(name);
- atomicItemDao.insertAtomicItem(name, nums.get(0), nums.get(1));
- }
- }
-
- @Transactional
- // 0: 未更新
- // 1: 数据更新
- // 2: 数据插入
- public void pipelineAtoms(List atoms) {
- // TODO Auto-generated method stub
- for (Atom atom : atoms) {
- int status = atom.getStatus();
- switch (status) {
- case 0:
- break;
- case 1:
- atomicItemDao.updateAtomicItem(atom.getName(),
- atom.getNumTag(), atom.getNumInProj());
- break;
- default:
- atomicItemDao.insertAtomicItem(atom.getName(),
- atom.getNumTag(), atom.getNumInProj());
- }
-
- }
- }
-
-}
diff --git a/match_program/src/main/java/com/ossean/match/pipeline/ResultPipeline.java b/match_program/src/main/java/com/ossean/match/pipeline/ResultPipeline.java
deleted file mode 100644
index e1bd171..0000000
--- a/match_program/src/main/java/com/ossean/match/pipeline/ResultPipeline.java
+++ /dev/null
@@ -1,163 +0,0 @@
-package com.ossean.match.pipeline;
-
-import java.util.HashSet;
-import java.util.LinkedList;
-import java.util.List;
-import java.util.Set;
-
-import javax.annotation.Resource;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import org.springframework.stereotype.Component;
-import org.springframework.transaction.annotation.Transactional;
-
-import com.ossean.match.dao.ResultDao;
-import com.ossean.match.dao.TaggingDao;
-import com.ossean.match.model.Project;
-
-@Component
-public class ResultPipeline {
- @Resource
- private ResultDao resultDao;
- @Resource
- private TaggingDao taggingDao;
-
- private Logger logger = LoggerFactory.getLogger(getClass());
-
- private LinkedList- items1 = new LinkedList
- ();
- private LinkedList
- items2 = new LinkedList
- ();
- private LinkedList
- items3 = new LinkedList
- ();
- private LinkedList projects = new LinkedList();
-
- @Transactional
- public void insertResult(int prjId, String string, int tagId,
- String tagOriginalName, float score) {
-
- }
-
- @Transactional
- public Set getPrjIdForMemo(int memoId) {
- List tagIds = taggingDao.getTagIdForMemo(memoId);
- Set result = new HashSet();
- for (int tagId : tagIds) {
- List tempIntegers = resultDao.getMatchPrjId(tagId);
- result.addAll(tempIntegers);
- }
- return result;
- }
-
- public Set getTagbyPrjId(int prjId){
- List tags = resultDao.getMatchTag(prjId);
- Set result = new HashSet();
- for (String tag : tags) {
-// List tempIntegers = resultDao.getMatchPrjId(tagId);
- result.add(tag);
- }
- return result;
- }
-
- @Transactional
- public void updateRank(Project project, int flag) {
- if (flag == 1) {
- projects.add(project);
- if (projects.size() >= 10000) {
- for (Project tproject : projects) {
- System.out.println("handle project:" + tproject.getId());
- resultDao.updateRank(tproject.getRank(),
- tproject.getStar(), tproject.getId());
- }
- projects.clear();
- }
- } else {
- for (Project tproject : projects) {
- System.out.println("handle project:" + tproject.getId());
- resultDao.updateRank(tproject.getRank(), tproject.getStar(),
- tproject.getId());
- }
- projects.clear();
- }
- }
-
- @Transactional
- public void insertResult1(int prjId, String prjName, int tagId,
- String tagName, float score) {
- items1.add(new Item(prjId, prjName, tagId, tagName, score));
- // 10000条输入数据库一次
- if (items1.size() >= 10000) {
- for (Item item : items1) {
- resultDao.insertResult1(item.getPrjId(), item.getPrjName(),
- item.getTagId(), item.getTagName(), item.getScore());
- }
- items1.clear();
- }
- }
-
- @Transactional
- public void insertResult2(int prjId, String prjName, int tagId,
- String tagName, float score) {
- items2.add(new Item(prjId, prjName, tagId, tagName, score));
- // 10000条输入数据库一次
- if (items2.size() >= 10000) {
- for (Item item : items2) {
- resultDao.insertResult2(item.getPrjId(), item.getPrjName(),
- item.getTagId(), item.getTagName(), item.getScore());
- }
- items2.clear();
- }
- }
-
- @Transactional
- public void insertResult3(int prjId, String prjName, int tagId,
- String tagName, float score) {
- items3.add(new Item(prjId, prjName, tagId, tagName, score));
- // 10000条输入数据库一次
- if (items3.size() >= 10000) {
- logger.info("match tag and project :" + prjId);
- for (Item item : items3) {
- resultDao.insertResult3(item.getPrjId(), item.getPrjName(),
- item.getTagId(), item.getTagName(), item.getScore());
- }
- items3.clear();
- }
- }
-}
-
-class Item {
-
- public Item(int prjId, String prjName, int tagId, String tagName,
- float score) {
- super();
- this.tagName = tagName;
- this.prjId = prjId;
- this.prjName = prjName;
- this.tagId = tagId;
- this.score = score;
- }
-
- public int getPrjId() {
- return prjId;
- }
-
- public String getPrjName() {
- return prjName;
- }
-
- public int getTagId() {
- return tagId;
- }
-
- public float getScore() {
- return score;
- }
-
- public String getTagName() {
- return tagName;
- }
-
- int prjId;
- String prjName;
- int tagId;
- String tagName;
- float score;
-}
diff --git a/match_program/src/main/resources/applicationContext_mybatis.xml b/match_program/src/main/resources/applicationContext_mybatis.xml
index bfc2d83..f6deb8f 100644
--- a/match_program/src/main/resources/applicationContext_mybatis.xml
+++ b/match_program/src/main/resources/applicationContext_mybatis.xml
@@ -38,7 +38,7 @@
-
+
diff --git a/project_manager/bin/hotwords.sh b/project_manager/bin/hotwords.sh
index 2c3acfe..3b26e22 100644
--- a/project_manager/bin/hotwords.sh
+++ b/project_manager/bin/hotwords.sh
@@ -16,4 +16,4 @@ JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTe
#echo JVM_ARGS=$JVM_ARGS
#ulimit -n 400000
#echo "" > nohup.out
-java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.HotwordsMain >>log/hotwords.log 2>&1 &
\ No newline at end of file
+java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.HotwordsMain > log/hotwords.log 2>&1 &
\ No newline at end of file
diff --git a/project_manager/bin/projectsfilter.sh b/project_manager/bin/projectsfilter.sh
index 6a7900b..eb7daaa 100644
--- a/project_manager/bin/projectsfilter.sh
+++ b/project_manager/bin/projectsfilter.sh
@@ -17,4 +17,4 @@ JVM_ARGS="-Xmn98m -Xmx1024m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxT
#echo JVM_ARGS=$JVM_ARGS
#ulimit -n 400000
#echo "" > nohup.out
-java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.ProjectsFilterMain >>log/projectsfilter.log 2>&1 &
\ No newline at end of file
+java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.ProjectsFilterMain > log/projectsfilter.log 2>&1 &
\ No newline at end of file
diff --git a/project_manager/bin/resources/applicationContext_mybatis.xml b/project_manager/bin/resources/applicationContext_mybatis.xml
index 7935118..6c5596e 100644
--- a/project_manager/bin/resources/applicationContext_mybatis.xml
+++ b/project_manager/bin/resources/applicationContext_mybatis.xml
@@ -39,9 +39,9 @@
destroy-method="close">
+ value="jdbc:mysql://172.16.128.36:3306/ossean_production?characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&autoReconnect=true" />
-
+
diff --git a/project_manager/bin/syncmatchresult.sh b/project_manager/bin/syncmatchresult.sh
index fc917d6..c41c0f3 100644
--- a/project_manager/bin/syncmatchresult.sh
+++ b/project_manager/bin/syncmatchresult.sh
@@ -17,4 +17,4 @@ JVM_ARGS="-Xmn98m -Xmx1024m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxT
#echo JVM_ARGS=$JVM_ARGS
#ulimit -n 400000
#echo "" > nohup.out
-java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.SyncMatchResultMain >>log/syncmatchresult.log 2>&1 &
\ No newline at end of file
+java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.SyncMatchResultMain > log/syncmatchresult.log 2>&1 &
\ No newline at end of file
diff --git a/project_manager/bin/updateosptags.sh b/project_manager/bin/updateosptags.sh
index fdde324..2289141 100644
--- a/project_manager/bin/updateosptags.sh
+++ b/project_manager/bin/updateosptags.sh
@@ -17,4 +17,4 @@ JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTe
#echo JVM_ARGS=$JVM_ARGS
#ulimit -n 400000
#echo "" > nohup.out
-java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.UpdateOspTagsMain >>log/updateosptags.log 2>&1 &
\ No newline at end of file
+java $JVM_ARGS -classpath $CLASSPATH com.ossean.projectmanager.UpdateOspTagsMain > log/updateosptags.log 2>&1 &
\ No newline at end of file
diff --git a/project_manager/src/main/java/com/ossean/projectmanager/UpdateOspTagsMain.java b/project_manager/src/main/java/com/ossean/projectmanager/UpdateOspTagsMain.java
index 9eadf04..2551d6a 100644
--- a/project_manager/src/main/java/com/ossean/projectmanager/UpdateOspTagsMain.java
+++ b/project_manager/src/main/java/com/ossean/projectmanager/UpdateOspTagsMain.java
@@ -16,7 +16,8 @@ public class UpdateOspTagsMain {
private UpdateOspTags updateTags;
public void start(){
- updateTags.updateOspTags();
+// updateTags.updateOspTags();
+ updateTags.restorePrjTaggings();
}
public static void main(String[] args){
diff --git a/project_manager/src/main/java/com/ossean/projectmanager/hotwords/UpdateOspTags.java b/project_manager/src/main/java/com/ossean/projectmanager/hotwords/UpdateOspTags.java
index 8382421..c6d38ef 100644
--- a/project_manager/src/main/java/com/ossean/projectmanager/hotwords/UpdateOspTags.java
+++ b/project_manager/src/main/java/com/ossean/projectmanager/hotwords/UpdateOspTags.java
@@ -13,6 +13,7 @@ import com.ossean.projectmanager.lasttabledao.TagDao;
import com.ossean.projectmanager.lasttabledao.TaggingsDao;
import com.ossean.projectmanager.model.OpenSourceProject;
import com.ossean.projectmanager.model.Taggings;
+import com.ossean.projectmanager.utils.DataHandler;
/**
*
@@ -28,6 +29,9 @@ public class UpdateOspTags {
private TagDao tagDao;
@Resource
private TaggingsDao taggingsDao;
+
+ private String taggingsTableName = "taggings";
+ private String tagsTableName = "tags";
/**
* 根据taggings更新项目标签字段tags和权重更高的标签字段tags_for_search
@@ -67,5 +71,50 @@ public class UpdateOspTags {
}
}
+
+ public void restorePrjTaggings(){
+ logger.info("start updateOspTags ......");
+ int start = 0;
+ int prjId = 0;
+ while (start < 2000000) {
+ List ospList = ospDao.getProjectsByBatch(start,
+ 5000);
+ for (OpenSourceProject osp : ospList) {
+ prjId = osp.getId();
+ // 将tag和项目的关系存入表item_tag_relation 并分离tag
+ String tags = osp.getTags();
+ if (tags == null) {
+ // 表示该项目没有标签
+ continue;
+ }
+ List tagList = DataHandler
+ .tagsSegmentation(tags);
+ for (String tag : tagList) {
+ ospDao.insertTag(tagsTableName, tag);// ignore方式插入该项目的标签
+ int tag_id = ospDao.selectTagIdByName(
+ tagsTableName, tag);
+ Taggings taggings = new Taggings();
+ taggings.setTag_id(tag_id);
+ taggings.setTaggable_id(prjId);
+ taggings.setTaggable_type("OpenSourceProject");
+ taggings.setContext("tags");
+ taggings.setCreated_at(DataHandler.getNow());
+ // 将Taggings对象存入数据库中
+ try {
+ ospDao.insertTaggings(taggingsTableName,
+ taggings);
+ } catch (Exception e) {
+ // 在插入记录之前 relative_memos表中的记录已经被删除掉了
+ logger.error("insert taggins error: " + e);
+ System.exit(0);
+ }
+ }
+ logger.info("currentPrjId: " + prjId);
+ }
+ logger.info("last prj batch end, currentPrjId: " + prjId);
+ start = prjId + 5000;
+ }
+
+ }
}
diff --git a/project_manager/src/main/java/com/ossean/projectmanager/lasttabledao/OpenSourceProjectDao.java b/project_manager/src/main/java/com/ossean/projectmanager/lasttabledao/OpenSourceProjectDao.java
index fd70bad..988c9af 100644
--- a/project_manager/src/main/java/com/ossean/projectmanager/lasttabledao/OpenSourceProjectDao.java
+++ b/project_manager/src/main/java/com/ossean/projectmanager/lasttabledao/OpenSourceProjectDao.java
@@ -2,15 +2,32 @@ package com.ossean.projectmanager.lasttabledao;
import java.util.List;
+import org.apache.ibatis.annotations.Insert;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import org.apache.ibatis.annotations.Update;
import com.ossean.projectmanager.model.OpenSourceProject;
+import com.ossean.projectmanager.model.Taggings;
public interface OpenSourceProjectDao {
-
- //获得最大项目id
+
+ // 向tag表存储数据
+ @Insert("insert ignore into ${table} (name) values (#{name})")
+ public void insertTag(@Param("table") String table,
+ @Param("name") String name);
+
+ // 根据tag的name找tag的id
+ @Select("select id from ${table} where name=#{name}")
+ public int selectTagIdByName(@Param("table") String table,
+ @Param("name") String name);
+
+ // 插入Taggings对象
+ @Insert("insert ignore into ${table} (tag_id,taggable_id,taggable_type,context,created_at,disagree_num) values (#{model.tag_id},#{model.taggable_id},#{model.taggable_type},#{model.context},#{model.created_at},#{model.disagree_num})")
+ public void insertTaggings(@Param("table") String table,
+ @Param("model") Taggings model);
+
+ // 获得最大项目id
@Select("select MAX(id) from open_source_projects")
public Integer getNewLast();
@@ -19,10 +36,9 @@ public interface OpenSourceProjectDao {
public List getProjectsByBatch(
@Param("start") int start, @Param("size") int size);
- //更新项目标签字段tags和权重更高的标签字段tags_for_search
+ // 更新项目标签字段tags和权重更高的标签字段tags_for_search
@Update("update open_source_projects set tags=#{tags}, tags_for_search = #{tagsForSearch} where id=#{id}")
- public void updatePrjTags(@Param("id") int id,
- @Param("tags") String tags,
+ public void updatePrjTags(@Param("id") int id, @Param("tags") String tags,
@Param("tagsForSearch") String tagsForSearch);
// 对项目标签属性进行更新
diff --git a/project_manager/src/main/java/com/ossean/projectmanager/projectsfilter/ProjectsFilter.java b/project_manager/src/main/java/com/ossean/projectmanager/projectsfilter/ProjectsFilter.java
index 28e6c52..05df7cc 100644
--- a/project_manager/src/main/java/com/ossean/projectmanager/projectsfilter/ProjectsFilter.java
+++ b/project_manager/src/main/java/com/ossean/projectmanager/projectsfilter/ProjectsFilter.java
@@ -68,9 +68,9 @@ public class ProjectsFilter {
.getOpenHubPrjByUrl(url); // 根据url从openhub的项目分表获得项目信息
if (openhubProject != null
&& openhubProject.getName() != null
- && openhubProject.getName() != ""
+ && !"".equals(openhubProject.getName())
&& openhubProject.getDescription() != null
- && openhubProject.getDescription() != ""
+ && !"".equals(openhubProject.getDescription())
&& openhubProject.getCodeRepository() != null
&& !openhubProject.getCodeRepository().contains(
"add a code location")) { // openhub的筛选条件为name、description不为空,且该项目有版本库
@@ -80,10 +80,11 @@ public class ProjectsFilter {
matchResultDao.deleteMatchResult(
getTargetTable(project.getId()),
project.getId()); // 删除该项目的匹配结果,确保无之前的匹配结果
- } else {
- lastProjectDao.updateFiltratedPrj(project.getId(),
- 2); // 筛选标识由1或2变为2,表示该项目之前就是筛选作为保留的
- }
+ }
+// else {
+// lastProjectDao.updateFiltratedPrj(project.getId(),
+// 2); // 筛选标识由1或2变为2,表示该项目之前就是筛选作为保留的
+// }
} else {
lastProjectDao.updateFiltratedPrj(project.getId(), 0); // 筛选标识变为0,表示该项目不保留
matchResultDao.deleteMatchResult(
@@ -95,9 +96,9 @@ public class ProjectsFilter {
.getSourceForgePrjByUrl(url); // 根据url从SourceForge的项目分表获得项目信息
if (sourceforgeProject != null
&& sourceforgeProject.getName() != null
- && sourceforgeProject.getName() != ""
+ && !"".equals(sourceforgeProject.getName())
&& sourceforgeProject.getDescription() != null
- && sourceforgeProject.getDescription() != ""
+ && !"".equals(sourceforgeProject.getDescription())
&& ((sourceforgeProject.getDownload_num() > 0) || (sourceforgeProject
.getStars() > 0))) {
if (project.getFilration() == 0) {
@@ -106,10 +107,11 @@ public class ProjectsFilter {
matchResultDao.deleteMatchResult(
getTargetTable(project.getId()),
project.getId()); // 删除该项目的匹配结果,确保无之前的匹配结果
- } else {
- lastProjectDao.updateFiltratedPrj(project.getId(),
- 2); // 筛选标识由1或2变为2,表示该项目之前就是筛选作为保留的
- }
+ }
+// else {
+// lastProjectDao.updateFiltratedPrj(project.getId(),
+// 2); // 筛选标识由1或2变为2,表示该项目之前就是筛选作为保留的
+// }
} else {
lastProjectDao.updateFiltratedPrj(project.getId(), 0); // 筛选标识变为0,表示该项目不保留
matchResultDao.deleteMatchResult(
@@ -122,9 +124,10 @@ public class ProjectsFilter {
matchResultDao.deleteMatchResult(
getTargetTable(project.getId()),
project.getId()); // 删除该项目的匹配结果,确保无之前的匹配结果
- } else {
- lastProjectDao.updateFiltratedPrj(project.getId(), 2); // 筛选标识由1或2变为2,表示该项目之前就是筛选作为保留的
- }
+ }
+// else {
+// lastProjectDao.updateFiltratedPrj(project.getId(), 2); // 筛选标识由1或2变为2,表示该项目之前就是筛选作为保留的
+// }
} else {
logger.info("Unknown source... source = " + source);
}
diff --git a/project_manager/src/main/java/com/ossean/projectmanager/utils/DataHandler.java b/project_manager/src/main/java/com/ossean/projectmanager/utils/DataHandler.java
new file mode 100644
index 0000000..be040fc
--- /dev/null
+++ b/project_manager/src/main/java/com/ossean/projectmanager/utils/DataHandler.java
@@ -0,0 +1,38 @@
+package com.ossean.projectmanager.utils;
+
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class DataHandler {
+
+ // 获取当前时间的String
+ public static String getNow() {
+ SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
+ String result = sdf.format(new Date());
+ return result;
+ }
+
+ // 标签分离函数
+ public static List tagsSegmentation(String tags) {
+ List tag = new ArrayList();
+
+ if (tags != null) {
+ String regex = "<[^<>]*>";
+ Pattern pattern = Pattern.compile(regex);
+ Matcher matcher = pattern.matcher(tags);
+
+ while (matcher.find()) {
+ String t = matcher.group();
+ t = t.substring(1, t.length() - 1);
+
+ tag.add(t);
+ }
+ }
+ return tag;
+ }
+
+}
diff --git a/project_match/bin/resources/applicationContext_mybatis.xml b/project_match/bin/resources/applicationContext_mybatis.xml
index 6737896..c698761 100644
--- a/project_match/bin/resources/applicationContext_mybatis.xml
+++ b/project_match/bin/resources/applicationContext_mybatis.xml
@@ -19,9 +19,9 @@
destroy-method="close">
+ value="jdbc:mysql://localhost:3306/ossean_production?characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull&autoReconnect=true" />
-
+
diff --git a/project_match/pom.xml b/project_match/pom.xml
index 699080b..e9148f4 100644
--- a/project_match/pom.xml
+++ b/project_match/pom.xml
@@ -46,7 +46,7 @@
junit
junit
- 3.8.1
+ 4.10
test
diff --git a/project_match/src/main/java/com/ossean/ClearTable.java b/project_match/src/main/java/com/ossean/ClearTable.java
index 26785a9..b98ed69 100644
--- a/project_match/src/main/java/com/ossean/ClearTable.java
+++ b/project_match/src/main/java/com/ossean/ClearTable.java
@@ -16,15 +16,35 @@ public class ClearTable {
Logger logger = Logger.getLogger(ClearTable.class);
public void start(){
- truncateTable("edd_pointers");
+ truncateTable("synonyms");
truncateTable("edd_relations");
truncateTable("synonymmings");
truncateTable("open_source_projects");
- deleteTaggings4Project();
+ deleteItemInEddPointers("gather_projects", "edd_relations");
+ //deleteTaggings4Project();
logger.info("完成去重程序运行前数据表的清空和taggings表OpenSourceProject的删除");
}
-
+ //删除edd_pointers表中transfer对应的数据
+ public void deleteItemInEddPointers(String sourceTableName, String targetTableName){
+ String sql = "delete from edd_pointers where SourceTableName=? and TargetTableName=?";
+ Connection conn = getConnection();
+ if(conn == null){
+ logger.info("没有获取到Connection");
+ System.exit(-1);
+ }
+ try {
+ PreparedStatement ps = conn.prepareStatement(sql);
+ ps.setString(1, sourceTableName);
+ ps.setString(2, targetTableName);
+ ps.execute();
+ ps.close();
+ conn.close();
+ } catch (SQLException e) {
+ // TODO Auto-generated catch block
+ e.printStackTrace();
+ }
+ }
public void truncateTable(String table){
String sql = "truncate table " + table;
Connection conn = getConnection();
@@ -65,7 +85,7 @@ public class ClearTable {
Connection conn = null;
try {
Class.forName("com.mysql.jdbc.Driver").newInstance();
- conn = DriverManager.getConnection("jdbc:mysql://172.16.128.30:3306/ossean", "root", "123456");
+ conn = DriverManager.getConnection("jdbc:mysql://localhost:3306/ossean_production", "root", "NUDTpdl@123");
} catch (InstantiationException e) {
e.printStackTrace();
} catch (IllegalAccessException e) {
diff --git a/project_match/src/main/java/com/ossean/MergeProjects.java b/project_match/src/main/java/com/ossean/MergeProjects.java
index bb19c09..a752b7d 100644
--- a/project_match/src/main/java/com/ossean/MergeProjects.java
+++ b/project_match/src/main/java/com/ossean/MergeProjects.java
@@ -1,98 +1,113 @@
-package com.ossean;
-
-import java.util.List;
-
-import javax.annotation.Resource;
-
-import org.apache.log4j.Logger;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.beans.factory.annotation.Qualifier;
-import org.springframework.context.ApplicationContext;
-import org.springframework.context.support.ClassPathXmlApplicationContext;
-import org.springframework.stereotype.Component;
-
-import com.ossean.databaseSource.DBSource;
-import com.ossean.databaseSource.GatherDao;
-import com.ossean.databaseSource.UpdateControlProjectsDao;
-import com.ossean.model.GatherProjectsModel;
-import com.ossean.util.MergeProjectNew;
-import com.ossean.util.MergeProjectsUtil;
-
-@Component
-public class MergeProjects {
- Logger logger = Logger.getLogger(this.getClass());
- @Resource
- private DBSource dbSource;
- @Resource
- private GatherDao gatherDao;
- @Resource
- private UpdateControlProjectsDao updateControlDao;
-
- @Qualifier("mergeProjectNew")
- @Autowired
- private MergeProjectNew mergeProjectNew;
-
- private int startId1;
-
- private static String pointerTableName = "edd_pointers";
- private static String sourceTableName = "gather_projects";
- private static String targetTableName = "edd_relations";
-
- private static int batchSize = 500;
-
- //读指针
- public int readPointer(String table, String source, String target, int minId){
- int pointer = minId;
- try {
- pointer = dbSource.getPointer(table, source, target);
- } catch(Exception e) {
- logger.info("No such pointer! Create one");
- dbSource.insertPointer(table, source, target, pointer);
- }
- return pointer;
- }
-
- public void start(){
- int minId = 1;
- int maxId = dbSource.getMaxId(sourceTableName);
-
- while(true){
- startId1 = readPointer(pointerTableName, sourceTableName, targetTableName, minId);
- if(startId1 > maxId){
- logger.info("finish program! HAHAHA");
- System.exit(0);
- }
- List gpmList1 = gatherDao.selectGPMList(sourceTableName, startId1, batchSize, maxId);
- if(gpmList1.size() == 0){
- try {
- logger.info("no increase item to be handled! handle update items");
- List updateProjectList = gatherDao.selectUpdateProjects(sourceTableName);
- for(GatherProjectsModel model:updateProjectList){
- mergeProjectNew.handleNewProject(model, true);
- gatherDao.updateUpdateMark(sourceTableName, null, model.getId());
- }
- logger.info("all update items have been solved. Sleep 1h");
- Thread.sleep(3600*1000);
- continue;
- } catch (InterruptedException e) {
- e.printStackTrace();
- }
- }
- for(GatherProjectsModel model1:gpmList1){
- logger.info("handling project : " + model1.getId());
- mergeProjectNew.handleNewProject(model1, false);
-
- //如果是对更新表中的数据进行操作 则不更新指针
- dbSource.updatePointer(pointerTableName, sourceTableName, targetTableName, model1.getId() + 1);
- gatherDao.updateUpdateMark(sourceTableName, null, model1.getId()); //需要更新update_mark字段 因为按顺序取的时候可能取到1的
- }
- }
- }
-
-
- public static void main(String[] args){
- ApplicationContext applicationContext = new ClassPathXmlApplicationContext("classpath:/applicationContext*.xml");
- MergeProjects Main = applicationContext.getBean(MergeProjects.class);
- Main.start();
- }
-}
+package com.ossean;
+
+import java.util.List;
+
+import javax.annotation.Resource;
+
+import org.apache.log4j.Logger;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Qualifier;
+import org.springframework.context.ApplicationContext;
+import org.springframework.context.support.ClassPathXmlApplicationContext;
+import org.springframework.stereotype.Component;
+
+import com.ossean.databaseSource.DBSource;
+import com.ossean.databaseSource.GatherDao;
+import com.ossean.databaseSource.UpdateControlProjectsDao;
+import com.ossean.model.GatherProjectsModel;
+import com.ossean.util.MergeProjectNew;
+import com.ossean.util.MergeProjectsUtil;
+
+@Component
+public class MergeProjects {
+ Logger logger = Logger.getLogger(this.getClass());
+ @Resource
+ private DBSource dbSource;
+ @Resource
+ private GatherDao gatherDao;
+ @Resource
+ private UpdateControlProjectsDao updateControlDao;
+
+ @Qualifier("mergeProjectNew")
+ @Autowired
+ private MergeProjectNew mergeProjectNew;
+
+ private int startId1;
+
+ private static String pointerTableName = "edd_pointers";
+ private static String sourceTableName = "gather_projects";
+ private static String targetTableName = "edd_relations";
+
+ private static int batchSize = 500;
+
+ //读指针
+ public int readPointer(String table, String source, String target, int minId){
+ int pointer = minId;
+ try {
+ pointer = dbSource.getPointer(table, source, target);
+ } catch(Exception e) {
+ logger.info("No such pointer! Create one");
+ dbSource.insertPointer(table, source, target, pointer);
+ }
+ return pointer;
+ }
+
+ public void start(){
+ //int minId = 1;
+ //int maxId = dbSource.getMaxId(sourceTableName);
+ int count=0;
+ count = readPointer(pointerTableName, sourceTableName, targetTableName, count);
+
+ while(true){
+ //startId1 = readPointer(pointerTableName, sourceTableName, targetTableName, minId);
+// if(startId1 > maxId){
+//
+// try {
+// logger.info("没有数据了,休眠10min");
+// Thread.sleep(600*1000L);
+// continue;
+// } catch (InterruptedException e) {
+// e.printStackTrace();
+// }
+// }
+ List gpmList1 = gatherDao.selectGPMList(sourceTableName,batchSize);
+
+ if(gpmList1.size() == 0){
+ try {
+ logger.info("no increase item to be handled! handle update items");
+ List updateProjectList = gatherDao.selectUpdateProjects(sourceTableName,batchSize);
+ if(updateProjectList.size() == 0){
+ logger.info("all update items have been solved. Sleep 10 mins");
+ Thread.sleep(600*1000);
+ continue;
+ }
+ else
+ for(GatherProjectsModel model:updateProjectList){
+ mergeProjectNew.handleNewProject(model, true);
+ gatherDao.updateUpdateMark(sourceTableName, 1, model.getId());
+ }
+
+ } catch (InterruptedException e) {
+ e.printStackTrace();
+ }
+ }
+ long start = System.currentTimeMillis();
+ for(GatherProjectsModel model1:gpmList1){
+ logger.info("handling project : " + model1.getId());
+ mergeProjectNew.handleNewProject(model1, false);
+ count = count+1;
+ dbSource.updatePointer(pointerTableName, sourceTableName, targetTableName, count);
+ gatherDao.updateUpdateMark(sourceTableName, 1, model1.getId()); //需要更新update_mark字段 因为按顺序取的时候可能取到1的
+ }
+ long end = System.currentTimeMillis();
+ logger.warn("deal with: 500 projects cost: "+(float)(end - start)/60000+" minutes");
+ }
+ }
+
+
+ public static void main(String[] args){
+ ApplicationContext applicationContext = new ClassPathXmlApplicationContext("classpath:/applicationContext*.xml");
+ MergeProjects Main = applicationContext.getBean(MergeProjects.class);
+ Main.start();
+ }
+}
diff --git a/project_match/src/main/java/com/ossean/MergeProjectsOld.java b/project_match/src/main/java/com/ossean/MergeProjectsOld.java
index a78290f..2454c60 100644
--- a/project_match/src/main/java/com/ossean/MergeProjectsOld.java
+++ b/project_match/src/main/java/com/ossean/MergeProjectsOld.java
@@ -73,7 +73,7 @@ public class MergeProjectsOld {
}else{}
while(true){
startId1 = readPointer(pointerTableName, sourceTableName, edd_relations_pointer, minId);
- List gpmList1 = gatherDao.selectGPMList(sourceTableName, startId1, batchSize, maxId);
+ List gpmList1 = gatherDao.selectGPMList(sourceTableName, batchSize);
if(gpmList1.size() == 0){
try {
logger.info("no increase item to be handled! handle update items");
@@ -92,7 +92,7 @@ public class MergeProjectsOld {
List updateProjectList = gatherDao.selectUpdateProjects(sourceTableName);
for(GatherProjectsModel model:updateProjectList){
mergeProjectNew.handleNewProject(model, true);
- gatherDao.updateUpdateMark(sourceTableName, null, model.getId());
+ gatherDao.updateUpdateMark(sourceTableName, 1, model.getId());
}
logger.info("all update items have been solved. Sleep 1h");
Thread.sleep(3600*1000);
@@ -110,7 +110,7 @@ public class MergeProjectsOld {
//如果是对更新表中的数据进行操作 则不更新指针
dbSource.updatePointer(pointerTableName, sourceTableName, edd_relations_pointer, model1.getId() + 1);
- gatherDao.updateUpdateMark(sourceTableName, null, model1.getId()); //需要更新update_mark字段 因为按顺序取的时候可能取到1的
+ gatherDao.updateUpdateMark(sourceTableName, 1, model1.getId()); //需要更新update_mark字段 因为按顺序取的时候可能取到1的
}
}
}
diff --git a/project_match/src/main/java/com/ossean/databaseDest/DBDest.java b/project_match/src/main/java/com/ossean/databaseDest/DBDest.java
index 992fdce..d4fa82b 100644
--- a/project_match/src/main/java/com/ossean/databaseDest/DBDest.java
+++ b/project_match/src/main/java/com/ossean/databaseDest/DBDest.java
@@ -26,13 +26,13 @@ public interface DBDest {
*/
//向open_source_projects表中插入对象数据
@Insert("insert into ${table} (id,name,description,"
- + "url,url_md5,language,category,"
+ + "url,language,category,"
+ "source,created_time,updated_time,"
- + "tags,tags_for_search,synonyms,license,homepage,extracted_time,update_mark) values (#{model.id},#{model.name},#{model.description},"
- + "#{model.url},#{model.url_md5},#{model.language},"
+ + "tags,tags_for_search,synonyms,update_mark) values (#{model.id},#{model.name},#{model.description},"
+ + "#{model.url},,#{model.language},"
+ "#{model.category},#{model.source},"
+ "#{model.created_time},#{model.updated_time},#{model.tags},"
- + "#{model.tags_for_search},#{model.synonyms},#{model.license},#{model.homepage},#{model.extracted_time},#{model.update_mark})")
+ + "#{model.tags_for_search},#{model.synonyms},#{model.update_mark})")
public void insertOsp(@Param("table") String table, @Param("model") OpenSourceProject model);
//查找刚刚插入open_source_projects表中的记录id
diff --git a/project_match/src/main/java/com/ossean/databaseSource/GatherDao.java b/project_match/src/main/java/com/ossean/databaseSource/GatherDao.java
index 7cf9df5..6868e83 100644
--- a/project_match/src/main/java/com/ossean/databaseSource/GatherDao.java
+++ b/project_match/src/main/java/com/ossean/databaseSource/GatherDao.java
@@ -87,8 +87,10 @@ public interface GatherDao {
//根据id和limit取GatherProjectsModel列表
- @Select("select * from ${table} where id>=#{start} and id<=#{maxId} order by id asc limit #{size}")
- public List selectGPMList(@Param("table") String table, @Param("start") int start, @Param("size") int size, @Param("maxId") int maxId);
+ //@Select("select * from ${table} where id>=#{start} and id<=#{maxId} order by id asc limit #{size}")
+ //public List selectGPMList(@Param("table") String table, @Param("start") int start, @Param("size") int size, @Param("maxId") int maxId);
+ @Select("select * from ${table} where update_mark = 0 limit #{size}")
+ public List selectGPMList(@Param("table") String table, @Param("size") int size);
@Select("select * from ${table} where id>=#{start} and id<#{end} limit #{size}")
public List selectGPMListBySize(@Param("table") String table, @Param("start") int start, @Param("end") int end, @Param("size") int size);
@@ -106,12 +108,12 @@ public interface GatherDao {
public List selectGPMBySameName(@Param("table") String table, @Param("name") String name, @Param("id") int id);
- //取出所有update_mark为1的更新数据
- @Select("select * from ${table} where update_mark=1")
- public List selectUpdateProjects(@Param("table") String table);
+ //取出所有update_mark为2的更新数据
+ @Select("select * from ${table} where update_mark=2 limit #{size}")
+ public List selectUpdateProjects(@Param("table") String table,@Param("size") int size);
//更新update_mark字段
@Select("update ${table} set update_mark=#{update_mark} where id=#{id}")
- public void updateUpdateMark(@Param("table") String table, @Param("update_mark") String update_mark, @Param("id") int id);
+ public void updateUpdateMark(@Param("table") String table, @Param("update_mark") int update_mark, @Param("id") int id);
}
\ No newline at end of file
diff --git a/project_match/src/main/java/com/ossean/util/StringHandler.java b/project_match/src/main/java/com/ossean/util/StringHandler.java
index 54d195f..3411d34 100644
--- a/project_match/src/main/java/com/ossean/util/StringHandler.java
+++ b/project_match/src/main/java/com/ossean/util/StringHandler.java
@@ -45,7 +45,7 @@ public class StringHandler {
}
}
resultStrArr = tmp.toArray(new String[0]);
-System.out.println("bangbangbang"+resultStrArr[0]+" "+resultStrArr[1]);
+//System.out.println("bangbangbang"+resultStrArr[0]+" "+resultStrArr[1]);
return resultStrArr;
}
diff --git a/project_match/src/main/resource/log4j.xml b/project_match/src/main/resource/log4j.xml
index dbbef90..a368b6c 100644
--- a/project_match/src/main/resource/log4j.xml
+++ b/project_match/src/main/resource/log4j.xml
@@ -23,6 +23,16 @@
+
+
+
+
+
+
+
+
+
+
@@ -47,13 +57,13 @@
-
-
+
+
\ No newline at end of file
diff --git a/project_match/src/test/java/foo/Testcase.java b/project_match/src/test/java/foo/Testcase.java
deleted file mode 100644
index a065f73..0000000
--- a/project_match/src/test/java/foo/Testcase.java
+++ /dev/null
@@ -1,126 +0,0 @@
-package foo;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import javax.annotation.Resource;
-
-import org.apache.log4j.Logger;
-import org.junit.Test;
-import org.junit.runner.RunWith;
-import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.beans.factory.annotation.Qualifier;
-import org.springframework.context.ApplicationContext;
-import org.springframework.test.context.ContextConfiguration;
-import org.springframework.test.context.junit4.SpringJUnit4ClassRunner;
-
-import com.ossean.databaseSource.DBSource;
-import com.ossean.databaseSource.GatherDao;
-import com.ossean.model.GatherProjectsModel;
-import com.ossean.util.MergeProjectNew;
-import com.ossean.util.MergeProjectsUtil;
-import com.ossean.util.RegexHandler;
-import com.ossean.util.StringHandler_ProjectName;
-
-@RunWith(SpringJUnit4ClassRunner.class)
-@ContextConfiguration(locations = {"classpath*:/applicationContext*.xml"})
-public class Testcase {
-
- @Autowired
- ApplicationContext ctx;
-
- Logger logger = Logger.getLogger(this.getClass());
- @Resource
- private DBSource dbSource;
- @Resource
- private GatherDao gatherDao;
-
- @Qualifier("mergeProjectsUtil")
- @Autowired
- private MergeProjectsUtil mergeProjectsUtil;
-
- @Qualifier("mergeProjectNew")
- @Autowired
- private MergeProjectNew mergeProjectNew;
-
-
- public static void main(String[] args) {
-
-
- }
-
- @Test
- public void testGetSynonyms(){
- List prj_list = gatherDao.selectGPMList("gather_projects_test", 11000, 100, 1135058);
- GatherProjectsModel gpm = gatherDao.selectGPMById("gather_projects", 11085);//1220,1995,1194,11085
- GatherProjectsModel gpm2 = gatherDao.selectGPMById("gather_projects", 237094);
- List list= mergeProjectNew.getSynonyms(gpm);
- String str = "";
- for (int j = 0;j list= mergeProjectNew.getSynonyms(prj_list.get(i));
-//
-// String str = "";
-// for (int j = 0;j synonymList = mergeProjectsUtil.getSynonyms(gpm_2);
-// for (int i =0;i tagsSegmentation(String tags) {
- List tag = new ArrayList();
-
- if (tags != null) {
- String regex = "<[^<>]*>";
- //一个Pattern是一个正则表达式经编译后的表现模式。
- Pattern pattern = Pattern.compile(regex);
- //一个Matcher对象是一个状态机器,它依据Pattern对象做为匹配模式对字符串展开匹配检查。
- Matcher matcher = pattern.matcher(tags);
-
- while (matcher.find()) {
- String t = matcher.group();
- t = t.substring(1, t.length() - 1);
-
- tag.add(t);
- }
- }
- return tag;
- }
-
-}
diff --git a/trustie2/Gemfile b/trustie2/Gemfile
index babf8a2..61f1793 100644
--- a/trustie2/Gemfile
+++ b/trustie2/Gemfile
@@ -1,5 +1,10 @@
-source 'http://ruby.taobao.org/'
+<<<<<<< HEAD
+#source 'http://ruby.taobao.org/'
+source 'https://rubygems.org'
+=======
+source 'https://ruby.taobao.org/'
#source 'https://rubygems.org'
+>>>>>>> tmp
unless RUBY_PLATFORM =~ /w32/
# unix-like only
diff --git a/trustie2/app/controllers/open_source_projects_controller.rb b/trustie2/app/controllers/open_source_projects_controller.rb
index 8b95594..486f8aa 100644
--- a/trustie2/app/controllers/open_source_projects_controller.rb
+++ b/trustie2/app/controllers/open_source_projects_controller.rb
@@ -60,11 +60,11 @@ class OpenSourceProjectsController < ApplicationController
#@open_source_project.update_column(:view_num_ossean, (@open_source_project.view_num_ossean.to_i+1))
#@open_source_project.save
- @pops=@open_source_project.open_source_project_popularities.where("year_col >= 2013").order("year_col ASC, month_col ASC")
- @hot_words=@open_source_project.hot_words
+ # @pops=@open_source_project.open_source_project_popularities.where("year_col >= 2013").order("year_col ASC, month_col ASC")
+ # @hot_words=@open_source_project.hot_words
@memo = RelativeMemo.new(:open_source_project => @open_source_project)
@topics_count = @open_source_project.relative_memos_num
- @bugs = @open_source_project.relative_bugs.limit(6)
+ # @bugs = @open_source_project.relative_bugs.limit(6)
###########推荐#############################################################
@simility_osp_array ,@simility_id_array,@simility_weight_array ,@simility_osp_des= recommend(params[:id],"simility")
@@ -205,6 +205,32 @@ class OpenSourceProjectsController < ApplicationController
language = ''
language << @language
#search_words << ' ' + @language if @language
+
+ # 请求starlee服务
+ require 'net/http'
+
+ star_projects = []
+ parse_string = "http://172.16.128.35:5000/correlation_search?q=" + @name
+
+ # parse_string = "http://172.20.10.3:5000/correlation_search?q=" + @name
+ parse_string.gsub! " ","%20"
+ url = URI.parse(parse_string)
+ req = Net::HTTP::Get.new(url.to_s)
+ res = Net::HTTP.start(url.host, url.port) {|http|
+ http.request(req)
+ }
+ star_items = res.body
+ require 'json'
+ star_items = JSON.parse(star_items)["items"]
+ star_project_ids = star_items[0,5]
+ star_projects = []
+ # 查询出id对应的项目
+ star_project_ids.each do |id|
+ id = id.to_i
+ project = OpenSourceProject.find_by_id(id)
+ star_projects << project
+ end
+
search = OpenSourceProject.search do
without(:filtration,0)
with(:created_time, Date.new(params[:created_time].to_i, 01, 01)..Date.new(params[:created_time].to_i+1, 01, 01)) if params[:created_time].present? && !('earlier'.eql? params[:created_time])
@@ -240,12 +266,36 @@ class OpenSourceProjectsController < ApplicationController
else
order_by(:score, :desc)
end
- paginate :page => params[:page], :per_page => 10 #the default paginator of solr,10 records for each page
+ paginate :page => params[:page], :per_page => 10 # the default paginator of solr,10 records for each page
end
per_page_option = 10
@hits = search.hits
@open_source_projects = search.results
@projects_count = search.total #get total count of search
+
+ open_source_projects_index = 0
+ while open_source_projects_index<@open_source_projects.length do
+ project = @open_source_projects[open_source_projects_index]
+ id = project.id
+ star_projects.each do |star_project|
+ if id.to_i == star_project.id
+ @open_source_projects.delete(project)
+ open_source_projects_index -= 1
+ break
+ end
+ end
+ open_source_projects_index += 1
+ end
+
+ if params[:page]==nil || params[:page]==1
+ star_index = star_projects.length-1
+ while star_index>=0 do
+ star_project = star_projects[star_index]
+ @open_source_projects.unshift(star_project)
+ star_index -= 1
+ end
+ end
+
# if @projects_count <= 10
# search_words1 = search_words.unpack("U*").select{ |p| (0x4e00..0x9fa5).member? p }.pack("U*")
# if search_words.include?('客户端')
@@ -271,6 +321,8 @@ class OpenSourceProjectsController < ApplicationController
# @projects_count = search.total + search_1.total
# end
@projects_pages = Paginator.new @projects_count, per_page_option, params['page'] #custom paginator,to show the 10 records the default paginator of solr gets
+
+
respond_to do |format|
format.html { render :layout => "base" }
end
@@ -408,7 +460,8 @@ class OpenSourceProjectsController < ApplicationController
@created_time = params[:created_time]
@projects_count=OpenSourceProject.count
- @posts_count = RelativeMemo.count
+ # @posts_count = RelativeMemo.count
+ @posts_count = 14243800
#@open_source_projects=OpenSourceProject.find_hot_osps(20)
@projects_for_cloud = OpenSourceProject.where("filtration != 0").order("composite_score desc").limit(60)
diff --git a/trustie2/app/helpers/application_helper.rb b/trustie2/app/helpers/application_helper.rb
index a59eca7..d9ca9c5 100644
--- a/trustie2/app/helpers/application_helper.rb
+++ b/trustie2/app/helpers/application_helper.rb
@@ -1689,11 +1689,11 @@ module ApplicationHelper
# nav_list.push(stores_link) if @nav_dispaly_store_all_label
#osp_index
- nav_list.push(osp_index_link) if @nav_dispaly_osp_index_label
+ # nav_list.push(osp_index_link) if @nav_dispaly_osp_index_label
#highchart_index
- nav_list.push(highchart_index_link) if @nav_dispaly_osp_index_label
+ # nav_list.push(highchart_index_link) if @nav_dispaly_osp_index_label
#sta_git_match
- nav_list.push(sta_git_match_link) if @nav_dispaly_osp_index_label
+ # nav_list.push(sta_git_match_link) if @nav_dispaly_osp_index_label
content_li = ''
nav_list.collect do |nav_item|
diff --git a/trustie2/app/models/open_source_project.rb b/trustie2/app/models/open_source_project.rb
index 04e73b0..5c12fb4 100644
--- a/trustie2/app/models/open_source_project.rb
+++ b/trustie2/app/models/open_source_project.rb
@@ -246,12 +246,12 @@ class OpenSourceProject < ActiveRecord::Base
#tag_ids = ActsAsTaggableOn::Tagging.select(:tag_id).where("taggable_id = ? AND taggable_type = ? AND disagree_num > ?", obj_id, "OpenSourceProject", 5)
# ActsAsTaggableOn::Tag.includes(ActsAsTaggableOn::Tagging).where("taggable_id = ? AND taggable_type = ? AND disagree_num > ?", obj_id, "OpenSourceProject", -1)
#tag_list = ActsAsTaggableOn::Tag.select(:name).where(id:tag_ids)
- tag_list = ActsAsTaggableOn::Tag.find_by_sql ["SELECT `tags`.* FROM `tags`"+
- " INNER JOIN `taggings` ON `tags`.`id` = `taggings`.`tag_id` WHERE"+
- "`taggings`.`taggable_id` = #{self.id}"+
- " AND `taggings`.`taggable_type` = 'OpenSourceProject' AND `taggings`.`disagree_num` > 5" +
- " AND (taggings.context = 'tags' AND taggings.tagger_id IS NULL) LIMIT #{limit}"]
- return tag_list
+ # tag_list = ActsAsTaggableOn::Tag.find_by_sql ["SELECT `tags`.* FROM `tags`"+
+ # " INNER JOIN `taggings` ON `tags`.`id` = `taggings`.`tag_id` WHERE"+
+ # "`taggings`.`taggable_id` = #{self.id}"+
+ # " AND `taggings`.`taggable_type` = 'OpenSourceProject' AND `taggings`.`disagree_num` > 5" +
+ # " AND (taggings.context = 'tags' AND taggings.tagger_id IS NULL) LIMIT #{limit}"]
+ # return tag_list
end
def get_tag_list_userTag(limit = 9)
diff --git a/trustie2/app/views/layouts/_base_footer.html.erb b/trustie2/app/views/layouts/_base_footer.html.erb
index 83737e6..1ae371d 100644
--- a/trustie2/app/views/layouts/_base_footer.html.erb
+++ b/trustie2/app/views/layouts/_base_footer.html.erb
@@ -18,7 +18,7 @@
"http://www.miibeian.gov.cn/", :target => "_blank" %>
-
+
diff --git a/trustie2/app/views/layouts/_base_header.html.erb b/trustie2/app/views/layouts/_base_header.html.erb
index 1b7cb83..bed0db1 100644
--- a/trustie2/app/views/layouts/_base_header.html.erb
+++ b/trustie2/app/views/layouts/_base_header.html.erb
@@ -75,7 +75,7 @@
<% end -%>
<%#= render_menu :top_menu if User.current.logged? || !Setting.login_required? -%>
- <%= render_dynamic_nav if User.current.logged? || !Setting.login_required? -%>
+ <%#= render_dynamic_nav if User.current.logged? || !Setting.login_required? -%>
<%# 自建导航条在base页面中以 (@nav_dispaly......) 开头变量设定, 全局搜索即可发现 %>
diff --git a/trustie2/app/views/layouts/_base_welcome_header.html.erb b/trustie2/app/views/layouts/_base_welcome_header.html.erb
index 7720c12..8f6266b 100644
--- a/trustie2/app/views/layouts/_base_welcome_header.html.erb
+++ b/trustie2/app/views/layouts/_base_welcome_header.html.erb
@@ -52,7 +52,7 @@
- <%= link_to image_tag("/images/logo.png", weight: "36px", height: "36px"), home_path %>
+
<% if User.current.logged? -%>
@@ -75,7 +75,7 @@
<% end -%>
<%#= render_menu :top_menu if User.current.logged? || !Setting.login_required? -%>
- <%= render_dynamic_nav if User.current.logged? || !Setting.login_required? -%>
+ <%#= render_dynamic_nav if User.current.logged? || !Setting.login_required? -%>
<%# 自建导航条在base页面中以 (@nav_dispaly......) 开头变量设定, 全局搜索即可发现 %>
diff --git a/trustie2/app/views/open_source_projects/_os_project.html.erb b/trustie2/app/views/open_source_projects/_os_project.html.erb
index 226d2cd..5fa3f26 100644
--- a/trustie2/app/views/open_source_projects/_os_project.html.erb
+++ b/trustie2/app/views/open_source_projects/_os_project.html.erb
@@ -24,9 +24,9 @@
- <%= (project.relative_memos_num.nil?)?0:project.relative_memos_num.to_i %>
+
- <%= content_tag('span', l(:label_x_relative_topics_count, :count => 0)) %>
+
@@ -35,10 +35,10 @@
- <%= short_num (project.view_num_ossean.nil?)?0:project.view_num_ossean %>
+
<%#= short_num ((project.view_num.nil?)?0:project.view_num)+project.view_num_ossean+project.relative_topics.sum('view_num') %>
- <%= content_tag('span', l(:label_view_num)) %>
+ <%#= content_tag('span', l(:label_view_num)) %>
diff --git a/trustie2/app/views/open_source_projects/_os_project_search_list.html.erb b/trustie2/app/views/open_source_projects/_os_project_search_list.html.erb
index 89b927c..98567a5 100644
--- a/trustie2/app/views/open_source_projects/_os_project_search_list.html.erb
+++ b/trustie2/app/views/open_source_projects/_os_project_search_list.html.erb
@@ -17,16 +17,16 @@
<%#= textilizable(project.short_description, :project => project) %>
- <%= show_project_descriptions project.result.description, 300 %>
+ <%= show_project_descriptions project.description, 300 %>
<%#= hidden_field_tag 'project_type', project_type %>
- <%= submit_tag l(:label_search_topics), :class => "enterprise", :name => nil, :onclick => "user_trace_search_knowledge_in_search_bar('#{request.session_options[:id]}');" %>
+ <%#= submit_tag l(:label_search_topics), :class => "enterprise", :name => nil, :onclick => "user_trace_search_knowledge_in_search_bar('#{request.session_options[:id]}');" %>
<% end %>
<%###################search topics###########################%>
diff --git a/trustie2/app/views/open_source_projects/search.html.erb b/trustie2/app/views/open_source_projects/search.html.erb
index 60d5ba5..521fa66 100644
--- a/trustie2/app/views/open_source_projects/search.html.erb
+++ b/trustie2/app/views/open_source_projects/search.html.erb
@@ -207,15 +207,33 @@
- - 查找条件>
+
+ <%#= show_condition(@app_dir, @language, @created_time, params[:name]) %>
+
- 共 <%= @projects_count %> 个开源项目
+
+ - 开发语言:
+ <%= link_to "JAVA", search_open_source_projects_path(:language => "JAVA", :app_dir => @app_dir, :created_time => @created_time, :name => params[:name]), :class => "nav-more J_More show" %>
+ <%= link_to "C++", search_open_source_projects_path(:language => "c++", :app_dir => @app_dir, :created_time => @created_time, :name => params[:name]), :class => "nav-more J_More show" %>
+ <%= link_to "C", search_open_source_projects_path(:language => "c", :app_dir => @app_dir, :created_time => @created_time, :name => params[:name]), :class => "nav-more J_More show" %>
+ <%= link_to "C#", search_open_source_projects_path(:language => "c#", :app_dir => @app_dir, :created_time => @created_time, :name => params[:name]), :class => "nav-more J_More show" %>
+ <%= link_to "ruby", search_open_source_projects_path(:language => "ruby", :app_dir => @app_dir, :created_time => @created_time, :name => params[:name]), :class => "nav-more J_More show" %>
+
+
+ - 创立时间:
+ <%= link_to "2014", search_open_source_projects_path(:app_dir => @app_dir, :language => @language, :created_time => "2014", :name => params[:name]), :class => "nav-more J_More show" %>
+ <%= link_to "2013", search_open_source_projects_path(:app_dir => @app_dir, :language => @language, :created_time => "2013", :name => params[:name]), :class => "nav-more J_More show" %>
+ <%= link_to "2012", search_open_source_projects_path(:app_dir => @app_dir, :language => @language, :created_time => "2012", :name => params[:name]), :class => "nav-more J_More show" %>
+ <%= link_to "2011", search_open_source_projects_path(:app_dir => @app_dir, :language => @language, :created_time => "2011", :name => params[:name]), :class => "nav-more J_More show" %>
+ <%= link_to "2010", search_open_source_projects_path(:app_dir => @app_dir, :language => @language, :created_time => "2010", :name => params[:name]), :class => "nav-more J_More show" %>
+ <%= link_to "更早", search_open_source_projects_path(:app_dir => @app_dir, :language => @language, :created_time => "earlier", :name => params[:name]), :class => "nav-more J_More show" %>
+
+
@@ -237,7 +255,7 @@
-->
-
@@ -285,12 +304,12 @@
- <% for i in (0..@hits.length-1) %>
- <% project = @hits[i] %>
+ <% for i in (0..@open_source_projects.length-1) %>
+ <% project = @open_source_projects[i] %>
<% if !project.nil? %>
-
- <%= link_to project.result.name, open_source_project_path(project.result), :class => "project root leaf", :onclick => "user_trace_click_project_in_project_list_after_search(this,#{i+1},'#{request.session_options[:id]}');" %>
+ <%= link_to project.name, open_source_project_path(project), :class => "project root leaf" %>
<%= render :partial => 'open_source_projects/os_project_search_list', :locals => {:project => project, :i => i} %>
diff --git a/trustie2/app/views/open_source_projects/show.html.erb b/trustie2/app/views/open_source_projects/show.html.erb
index 16a82f6..bddca2d 100644
--- a/trustie2/app/views/open_source_projects/show.html.erb
+++ b/trustie2/app/views/open_source_projects/show.html.erb
@@ -96,14 +96,14 @@
-
+
diff --git a/trustie2/app/views/open_source_projects/show_analysis_tab.html.erb b/trustie2/app/views/open_source_projects/show_analysis_tab.html.erb
index 9987aa9..64c2038 100644
--- a/trustie2/app/views/open_source_projects/show_analysis_tab.html.erb
+++ b/trustie2/app/views/open_source_projects/show_analysis_tab.html.erb
@@ -1,21 +1,21 @@
-
职位需求情况
+
-
薪资情况
+
工作地点分析
-
+
-
工作经验情况
+
工作经验分析
-
相关软件情况
+
相关软件分析
@@ -385,7 +385,12 @@
text: null
},
xAxis: {
- categories: categories
+ categories: categories,
+ labels: {
+ formatter: function() {
+ return(this.value.length>20?this.value.substring(0,20) + "...":this.value);
+ }
+ }
},
yAxis: {
title: {
@@ -465,6 +470,7 @@
'<%=osp.name.to_s %>',
<% end %>
],
+
data = [
<% for i in 0..@chart8_osp.length-1 %>
{
@@ -507,7 +513,12 @@
text: null
},
xAxis: {
- categories: categories
+ categories: categories,
+ labels: {
+ formatter: function() {
+ return(this.value.length>20?this.value.substring(0,20) + "...":this.value);
+ }
+ }
},
yAxis: {
title: {
diff --git a/trustie2/app/views/open_source_projects/welcome.html.erb b/trustie2/app/views/open_source_projects/welcome.html.erb
index b170cfd..a6fce1b 100644
--- a/trustie2/app/views/open_source_projects/welcome.html.erb
+++ b/trustie2/app/views/open_source_projects/welcome.html.erb
@@ -282,7 +282,7 @@
<%###########################访问量统计代码##############################%>
diff --git a/trustie2/app/views/relative_memos/search.html.erb b/trustie2/app/views/relative_memos/search.html.erb
index e4fa16a..3ca5c13 100644
--- a/trustie2/app/views/relative_memos/search.html.erb
+++ b/trustie2/app/views/relative_memos/search.html.erb
@@ -266,9 +266,9 @@
- <%= topic.view_num+topic.view_num_ossean %>
+
- <%= content_tag('span', l(:label_view_num)) %>
+
diff --git a/trustie2/app/views/tags/_tag_name.html.erb b/trustie2/app/views/tags/_tag_name.html.erb
index a3e35a6..4a53437 100644
--- a/trustie2/app/views/tags/_tag_name.html.erb
+++ b/trustie2/app/views/tags/_tag_name.html.erb
@@ -7,18 +7,21 @@
// });
// })
function reload_tags(){
- <% @tags = obj.reload.tag_list %>
+ <% #@tags = obj.reload.tag_list %>
+ <% @tags = obj.attributes["tags"]
+ if @tags != nil && @tags.length>2
+ @tags = @tags[1,@tags.length-2].split(">,<")
+ end %>
}
-<% if object_flag == '8'%>
-<%# @tags = obj.get_tag_list%>
-<% @tags = obj.tags.limit(9)%>
-<% elsif object_flag == '10' %>
-<% @tags = obj.AllTags.limit(9) %>
-<% else %>
-<% @tags = obj.reload.tag_list %>
-<% end%>
+<% #@tags = obj.get_tag_list%>
+<% @tags = obj.attributes["tags"]
+ if @tags != nil && @tags.length>2
+ @tags = @tags[1,@tags.length-2].split(">,<")
+ end
+
+ @tags = @tags[0,9]%>
<% if non_list_all and (@tags.size > 0) %>
diff --git a/trustie2/config/database.yml.example b/trustie2/config/database.yml.example
deleted file mode 100644
index 20db70b..0000000
--- a/trustie2/config/database.yml.example
+++ /dev/null
@@ -1,53 +0,0 @@
-source ~/.rvm/scripts/rvm# Default setup is given for MySQL with ruby1.9. If you're running Redmine
-source ~/.rvm/scripts/rvm# Default setup is given for MySQL with ruby1.9. If you're running Redmine
-# with MySQL and ruby1.8, replace the adapter name with `mysql`.
-# Examples for PostgreSQL, SQLite3 and SQL Server can be found at the end.
-# Line indentation must be 2 spaces (no tabs).
-
-production:
- adapter: mysql2
- database: redmine
- host: localhost
- username: root
- password: ""
- encoding: utf8
-
-development:
- adapter: mysql2
- database: redmine_development
- host: 10.107.17.20
- username: root
- password: "1234"
- encoding: utf8
-
-# Warning: The database defined as "test" will be erased and
-# re-generated from your development database when you run "rake".
-# Do not set this db to the same as development or production.
-test:
- adapter: mysql2
- database: redmine_test
- host: 10.107.17.20
- username: root
- password: "1234"
- encoding: utf8
-
-# PostgreSQL configuration example
-#production:
-# adapter: postgresql
-# database: redmine
-# host: localhost
-# username: postgres
-# password: "postgres"
-
-# SQLite3 configuration example
-#production:
-# adapter: sqlite3
-# database: db/redmine.sqlite3
-
-# SQL Server configuration example
-#production:
-# adapter: sqlserver
-# database: redmine
-# host: localhost
-# username: jenkins
-# password: jenkins
diff --git a/trustie2/config/puma.rb b/trustie2/config/puma.rb
new file mode 100644
index 0000000..c79f9e1
--- /dev/null
+++ b/trustie2/config/puma.rb
@@ -0,0 +1,7 @@
+environment "production"
+bind "unix:///var/run/ossean.sock"
+restart_command '/var/www/ossean/trustie2/restart_puma'
+daemonize true
+pidfile "/var/tmp/pids/puma.pid"
+#stdout_redirect "/var/www/ossean/trustie2/log/stdout", "/var/www/ossean/trustie2/log/stderr"
+
diff --git a/trustie2/db/migrate/20161103014905_add_index_to_relative_memo_to_open_source_projects_tables.rb b/trustie2/db/migrate/20161103014905_add_index_to_relative_memo_to_open_source_projects_tables.rb
new file mode 100644
index 0000000..3a14750
--- /dev/null
+++ b/trustie2/db/migrate/20161103014905_add_index_to_relative_memo_to_open_source_projects_tables.rb
@@ -0,0 +1,10 @@
+class AddIndexToRelativeMemoToOpenSourceProjectsTables < ActiveRecord::Migration
+ def change
+ arr = (1..70).to_a
+ arr.each do |letter|
+ add_index "relative_memo_to_open_source_projects_#{letter}".to_sym, :osp_id, :name => "osp_id_#{letter}"
+ add_index "relative_memo_to_open_source_projects_#{letter}".to_sym, :memo_type, :name => "memo_type_#{letter}"
+ add_index "relative_memo_to_open_source_projects_#{letter}".to_sym, :created_time, :name => "created_time_#{letter}"
+ end
+ end
+end
diff --git a/trustie2/public/images/avatars/Project/no_profile.png b/trustie2/public/images/avatars/Project/no_profile.png
new file mode 100644
index 0000000..7640e01
Binary files /dev/null and b/trustie2/public/images/avatars/Project/no_profile.png differ