projectfilter was improved.. added log4j.xml email

This commit is contained in:
zhanyun 2016-04-15 10:58:05 +08:00
parent ec78bb6304
commit 0a6f506931
7 changed files with 182 additions and 175 deletions

View File

@ -9,22 +9,6 @@
</layout>
</appender>
<appender name="file" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="./log/error.log" />
<param name="threshold" value="ERROR" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<appender name="file_log" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="./log/webmagic.log" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<!-- 邮件只有ERROR时才会发送 -->
<appender name="MAIL" class="org.apache.log4j.net.SMTPAppender">
<param name="threshold" value="debug" />
@ -35,7 +19,7 @@
<param name="From" value="ossean_debug@163.com" />
<param name="SMTPHost" value="smtp.163.com" />
<param name="Subject" value="ossean-crawler-debug-log4jMessage" />
<param name="To" value="gcm3651@126.com" />
<param name="To" value="cloud_zhan@163.com" />
<param name="SMTPUsername" value="ossean_debug" />
<param name="SMTPPassword" value="goodwell123" />
<layout class="org.apache.log4j.PatternLayout">

View File

@ -9,22 +9,6 @@
</layout>
</appender>
<appender name="file" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="./log/error.log" />
<param name="threshold" value="ERROR" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<appender name="file_log" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="./log/webmagic.log" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<!-- 邮件只有ERROR时才会发送 -->
<appender name="MAIL" class="org.apache.log4j.net.SMTPAppender">
<param name="threshold" value="debug" />
@ -35,7 +19,7 @@
<param name="From" value="ossean_debug@163.com" />
<param name="SMTPHost" value="smtp.163.com" />
<param name="Subject" value="ossean-crawler-debug-log4jMessage" />
<param name="To" value="gcm3651@126.com" />
<param name="To" value="cloud_zhan@163.com" />
<param name="SMTPUsername" value="ossean_debug" />
<param name="SMTPPassword" value="goodwell123" />
<layout class="org.apache.log4j.PatternLayout">

View File

@ -16,7 +16,7 @@
<param name="Subject" value="this is test" />
<param name="SMTPUsername" value="gcm365111@126.com" />
<param name="SMTPPassword" value="03023651gcm" />
<param name="to" value="gcm3651@126.com" />
<param name="to" value="cloud_zhan@163.com" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>

View File

@ -212,40 +212,48 @@ public class HotwordsMain {
* @param osp_id
* @return
*/
public static String getTargetTable(int osp_id){
public static String getTargetTable(int ospId){
String targetTableName = "";
if (osp_id < 500) {
targetTableName = "relative_memo_to_open_source_projects_1";
}
else if (osp_id >= 500 && osp_id < 1000) {
targetTableName = "relative_memo_to_open_source_projects_2";
}
else if (osp_id >= 1000 && osp_id < 1500) {
targetTableName = "relative_memo_to_open_source_projects_3";
}
else if (osp_id >= 1500 && osp_id < 2000) {
targetTableName = "relative_memo_to_open_source_projects_4";
}
else if (osp_id >= 2000 && osp_id < 3000) {
targetTableName = "relative_memo_to_open_source_projects_5";
}
else if (osp_id >= 3000 && osp_id < 5000) {
targetTableName = "relative_memo_to_open_source_projects_6";
}
else if (osp_id >= 5000 && osp_id < 7500) {
targetTableName = "relative_memo_to_open_source_projects_7";
}
else if (osp_id >= 7500 && osp_id < 10000) {
targetTableName = "relative_memo_to_open_source_projects_8";
}
else if (osp_id >= 10000 && osp_id < 310000) {
int a = 7 + osp_id/5000;
targetTableName = "relative_memo_to_open_source_projects_" + a;
}
else if (osp_id >= 310000) {
if(ospId >= 770000){
targetTableName = "relative_memo_to_open_source_projects_70";
}
else{
int a = 1 + ospId/11000;
targetTableName = "relative_memo_to_open_source_projects_" + a;
}
// if (osp_id < 500) {
// targetTableName = "relative_memo_to_open_source_projects_1";
// }
// else if (osp_id >= 500 && osp_id < 1000) {
// targetTableName = "relative_memo_to_open_source_projects_2";
// }
// else if (osp_id >= 1000 && osp_id < 1500) {
// targetTableName = "relative_memo_to_open_source_projects_3";
// }
// else if (osp_id >= 1500 && osp_id < 2000) {
// targetTableName = "relative_memo_to_open_source_projects_4";
// }
// else if (osp_id >= 2000 && osp_id < 3000) {
// targetTableName = "relative_memo_to_open_source_projects_5";
// }
// else if (osp_id >= 3000 && osp_id < 5000) {
// targetTableName = "relative_memo_to_open_source_projects_6";
// }
// else if (osp_id >= 5000 && osp_id < 7500) {
// targetTableName = "relative_memo_to_open_source_projects_7";
// }
// else if (osp_id >= 7500 && osp_id < 10000) {
// targetTableName = "relative_memo_to_open_source_projects_8";
// }
// else if (osp_id >= 10000 && osp_id < 310000) {
// int a = 7 + osp_id/5000;
// targetTableName = "relative_memo_to_open_source_projects_" + a;
// }
// else if (osp_id >= 310000) {
// targetTableName = "relative_memo_to_open_source_projects_70";
// }
return targetTableName;
//return "relative_memo_to_open_source_projects";
}
public static void main(String[] args){

View File

@ -9,6 +9,10 @@ import org.apache.ibatis.annotations.Update;
import com.ossean.projectmanager.model.OpenSourceProject;
public interface OpenSourceProjectDao {
//获得最大项目id
@Select("select MAX(id) from open_source_projects")
public Integer getNewLast();
// 读取一定数量的项目信息
@Select("select * from open_source_projects where id>=#{start} limit #{size}")

View File

@ -5,9 +5,11 @@ import java.util.List;
import javax.annotation.Resource;
import org.apache.commons.lang3.StringUtils;
import org.apache.log4j.Logger;
import org.springframework.stereotype.Component;
import com.ossean.projectmanager.lasttabledao.OpenSourceProjectDao;
import com.ossean.projectmanager.lasttabledao.PointersDao;
import com.ossean.projectmanager.lasttabledao.RelativeMemoToOpenSourceProjectDao;
import com.ossean.projectmanager.model.OpenhubProject;
import com.ossean.projectmanager.model.OpenSourceProject;
@ -18,122 +20,163 @@ import com.ossean.projectmanager.parttabledao.PartProjectDao;
public class ProjectsFilter {
@Resource
private OpenSourceProjectDao lastProjectDao;
@Resource
private PartProjectDao partProjectDao;
@Resource
private RelativeMemoToOpenSourceProjectDao matchResultDao;
@Resource
private PointersDao pointersDao;
Logger logger = Logger.getLogger(this.getClass());
/**
* 对项目总表根据各个社区的特定字段做筛选
*
* @throws InterruptedException
*/
public void filtratePrjs() {
//
List<OpenSourceProject> prjsList = lastProjectDao.getBatchPrjs(10000000);
for(OpenSourceProject project : prjsList){
String prjUrl = project.getUrl();
String source = "";
String url="";
if (prjUrl == null || "".equals(prjUrl)){
lastProjectDao.updateFiltratedPrj(project.getId(), 0);
continue;
}
if(prjUrl.contains("|,|")){ //即url中包含多个项目来源
String firstUrl = StringUtils.splitByWholeSeparator(prjUrl, "|,|")[0];//只对第一个即去重时保留的最热的项目来源做筛选
source = StringUtils.splitByWholeSeparator(firstUrl, "|:|")[0]; //从url字段中取得第一个来源社区
url = StringUtils.splitByWholeSeparator(firstUrl, "|:|")[1]; //获得第一个url
}
else{ //url只有一个项目来源
source = StringUtils.splitByWholeSeparator(prjUrl, "|:|")[0];
url = StringUtils.splitByWholeSeparator(prjUrl, "|:|")[1];
}
if(source.equals("OpenHub")){
OpenhubProject openhubProject = partProjectDao.getOpenHubPrjByUrl(url); //根据url从openhub的项目分表获得项目信息
if(openhubProject.getName()!=null&&openhubProject.getName()!=""&&
openhubProject.getDescription()!=null&&openhubProject.getDescription()!=""&&
!openhubProject.getCodeLocation().contains("add a code location")){ //openhub的筛选条件为namedescription不为空且该项目有版本库
if(project.getFilration()==0){
lastProjectDao.updateFiltratedPrj(project.getId(), 1); //筛选标识从0变为1表示该项目经过筛选新增的
matchResultDao.deleteMatchResult(getTargetTable(project.getId()), project.getId()); //删除该项目的匹配结果确保无之前的匹配结果
while (true) {
logger.info("reading projects......");
List<OpenSourceProject> prjsList = lastProjectDao
.getBatchPrjs(10000000);
for (OpenSourceProject project : prjsList) {
String prjUrl = project.getUrl();
String source = "";
String url = "";
if (prjUrl == null || "".equals(prjUrl)) {
lastProjectDao.updateFiltratedPrj(project.getId(), 0);
continue;
}
if (prjUrl.contains("|,|")) { // 即url中包含多个项目来源
String firstUrl = StringUtils.splitByWholeSeparator(prjUrl,
"|,|")[0];// 只对第一个即去重时保留的最热的项目来源做筛选
source = StringUtils.splitByWholeSeparator(firstUrl, "|:|")[0]; // 从url字段中取得第一个来源社区
url = StringUtils.splitByWholeSeparator(firstUrl, "|:|")[1]; // 获得第一个url
} else { // url只有一个项目来源
source = StringUtils.splitByWholeSeparator(prjUrl, "|:|")[0];
url = StringUtils.splitByWholeSeparator(prjUrl, "|:|")[1];
}
if (source.equals("OpenHub")) {
OpenhubProject openhubProject = partProjectDao
.getOpenHubPrjByUrl(url); // 根据url从openhub的项目分表获得项目信息
if (openhubProject.getName() != null
&& openhubProject.getName() != ""
&& openhubProject.getDescription() != null
&& openhubProject.getDescription() != ""
&& !openhubProject.getCodeLocation().contains(
"add a code location")) { // openhub的筛选条件为namedescription不为空且该项目有版本库
if (project.getFilration() == 0) {
lastProjectDao.updateFiltratedPrj(project.getId(),
1); // 筛选标识从0变为1表示该项目经过筛选新增的
matchResultDao.deleteMatchResult(
getTargetTable(project.getId()),
project.getId()); // 删除该项目的匹配结果确保无之前的匹配结果
} else {
lastProjectDao.updateFiltratedPrj(project.getId(),
2); // 筛选标识由1或2变为2表示该项目之前就是筛选作为保留的
}
} else {
lastProjectDao.updateFiltratedPrj(project.getId(), 0); // 筛选标识变为0表示该项目不保留
matchResultDao.deleteMatchResult(
getTargetTable(project.getId()),
project.getId()); // 删除该项目的匹配结果
}
else{
lastProjectDao.updateFiltratedPrj(project.getId(), 2); //筛选标识由1或2变为2表示该项目之前就是筛选作为保留的
} else if (source.equals("SourceForge")) {
SourceForgeProject sourceforgeProject = partProjectDao
.getSourceForgePrjByUrl(url); // 根据url从SourceForge的项目分表获得项目信息
if (sourceforgeProject.getName() != null
&& sourceforgeProject.getName() != ""
&& sourceforgeProject.getDescription() != null
&& sourceforgeProject.getDescription() != ""
&& ((sourceforgeProject.getDownload_num() > 0) || (sourceforgeProject
.getStars() > 0))) {
if (project.getFilration() == 0) {
lastProjectDao.updateFiltratedPrj(project.getId(),
1); // 筛选标识从0变为1表示该项目经过筛选新增的
matchResultDao.deleteMatchResult(
getTargetTable(project.getId()),
project.getId()); // 删除该项目的匹配结果确保无之前的匹配结果
} else {
lastProjectDao.updateFiltratedPrj(project.getId(),
2); // 筛选标识由1或2变为2表示该项目之前就是筛选作为保留的
}
} else {
lastProjectDao.updateFiltratedPrj(project.getId(), 0); // 筛选标识变为0表示该项目不保留
matchResultDao.deleteMatchResult(
getTargetTable(project.getId()),
project.getId()); // 删除该项目的匹配结果
}
} else if (source.equals("OSChina") || source.equals("Apache")) {
if (project.getFilration() == 0) {
lastProjectDao.updateFiltratedPrj(project.getId(), 1); // 筛选标识从0变为1表示该项目经过筛选新增的
matchResultDao.deleteMatchResult(
getTargetTable(project.getId()),
project.getId()); // 删除该项目的匹配结果确保无之前的匹配结果
} else {
lastProjectDao.updateFiltratedPrj(project.getId(), 2); // 筛选标识由1或2变为2表示该项目之前就是筛选作为保留的
}
}
else{
lastProjectDao.updateFiltratedPrj(project.getId(), 0); //筛选标识变为0表示该项目不保留
matchResultDao.deleteMatchResult(getTargetTable(project.getId()), project.getId()); //删除该项目的匹配结果
}
}
else if(source.equals("SourceForge")){
SourceForgeProject sourceforgeProject = partProjectDao.getSourceForgePrjByUrl(url); //根据url从SourceForge的项目分表获得项目信息
if(sourceforgeProject.getName()!=null&&sourceforgeProject.getName()!=""&&
sourceforgeProject.getDescription()!=null&&sourceforgeProject.getDescription()!=""&&
((sourceforgeProject.getDownload_num()>0) || (sourceforgeProject.getStars()>0))){
if(project.getFilration()==0){
lastProjectDao.updateFiltratedPrj(project.getId(), 1); //筛选标识从0变为1表示该项目经过筛选新增的
matchResultDao.deleteMatchResult(getTargetTable(project.getId()), project.getId()); //删除该项目的匹配结果确保无之前的匹配结果
}
else{
lastProjectDao.updateFiltratedPrj(project.getId(), 2); //筛选标识由1或2变为2表示该项目之前就是筛选作为保留的
}
}
else{
lastProjectDao.updateFiltratedPrj(project.getId(), 0); //筛选标识变为0表示该项目不保留
matchResultDao.deleteMatchResult(getTargetTable(project.getId()), project.getId()); //删除该项目的匹配结果
}
}
else{
if(project.getFilration()==0){
lastProjectDao.updateFiltratedPrj(project.getId(), 1); //筛选标识从0变为1表示该项目经过筛选新增的
matchResultDao.deleteMatchResult(getTargetTable(project.getId()), project.getId()); //删除该项目的匹配结果确保无之前的匹配结果
}
else{
lastProjectDao.updateFiltratedPrj(project.getId(), 2); //筛选标识由1或2变为2表示该项目之前就是筛选作为保留的
else {
logger.info("Unknown source... source = " + source);
}
}
logger.info("Filter done......sleeping......");
try {
Thread.sleep(60000000);// 一次筛选完成休息
} catch (InterruptedException e) {
logger.error(e);
}
}
}
/**
* get the match result table's name
*
* @param osp_id
* @return
*/
public static String getTargetTable(int osp_id){
public static String getTargetTable(int ospId) {
String targetTableName = "";
if (osp_id < 500) {
targetTableName = "relative_memo_to_open_source_projects_1";
}
else if (osp_id >= 500 && osp_id < 1000) {
targetTableName = "relative_memo_to_open_source_projects_2";
}
else if (osp_id >= 1000 && osp_id < 1500) {
targetTableName = "relative_memo_to_open_source_projects_3";
}
else if (osp_id >= 1500 && osp_id < 2000) {
targetTableName = "relative_memo_to_open_source_projects_4";
}
else if (osp_id >= 2000 && osp_id < 3000) {
targetTableName = "relative_memo_to_open_source_projects_5";
}
else if (osp_id >= 3000 && osp_id < 5000) {
targetTableName = "relative_memo_to_open_source_projects_6";
}
else if (osp_id >= 5000 && osp_id < 7500) {
targetTableName = "relative_memo_to_open_source_projects_7";
}
else if (osp_id >= 7500 && osp_id < 10000) {
targetTableName = "relative_memo_to_open_source_projects_8";
}
else if (osp_id >= 10000 && osp_id < 310000) {
int a = 7 + osp_id/5000;
if (ospId >= 770000) {
targetTableName = "relative_memo_to_open_source_projects_70";
} else {
int a = 1 + ospId / 11000;
targetTableName = "relative_memo_to_open_source_projects_" + a;
}
else if (osp_id >= 310000) {
targetTableName = "relative_memo_to_open_source_projects_70";
}
// if (osp_id < 500) {
// targetTableName = "relative_memo_to_open_source_projects_1";
// }
// else if (osp_id >= 500 && osp_id < 1000) {
// targetTableName = "relative_memo_to_open_source_projects_2";
// }
// else if (osp_id >= 1000 && osp_id < 1500) {
// targetTableName = "relative_memo_to_open_source_projects_3";
// }
// else if (osp_id >= 1500 && osp_id < 2000) {
// targetTableName = "relative_memo_to_open_source_projects_4";
// }
// else if (osp_id >= 2000 && osp_id < 3000) {
// targetTableName = "relative_memo_to_open_source_projects_5";
// }
// else if (osp_id >= 3000 && osp_id < 5000) {
// targetTableName = "relative_memo_to_open_source_projects_6";
// }
// else if (osp_id >= 5000 && osp_id < 7500) {
// targetTableName = "relative_memo_to_open_source_projects_7";
// }
// else if (osp_id >= 7500 && osp_id < 10000) {
// targetTableName = "relative_memo_to_open_source_projects_8";
// }
// else if (osp_id >= 10000 && osp_id < 310000) {
// int a = 7 + osp_id/5000;
// targetTableName = "relative_memo_to_open_source_projects_" + a;
// }
// else if (osp_id >= 310000) {
// targetTableName = "relative_memo_to_open_source_projects_70";
// }
return targetTableName;
// return "relative_memo_to_open_source_projects";
}
}

View File

@ -9,22 +9,6 @@
</layout>
</appender>
<appender name="file" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="./log/error.log" />
<param name="threshold" value="ERROR" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<appender name="file_log" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="./log/webmagic.log" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<!-- 邮件只有ERROR时才会发送 -->
<appender name="MAIL" class="org.apache.log4j.net.SMTPAppender">
<param name="threshold" value="debug" />
@ -35,7 +19,7 @@
<param name="From" value="ossean_debug@163.com" />
<param name="SMTPHost" value="smtp.163.com" />
<param name="Subject" value="ossean-crawler-debug-log4jMessage" />
<param name="To" value="gcm3651@126.com" />
<param name="To" value="cloud_zhan@163.com" />
<param name="SMTPUsername" value="ossean_debug" />
<param name="SMTPPassword" value="goodwell123" />
<layout class="org.apache.log4j.PatternLayout">