add the hotwords source code to the branch

This commit is contained in:
wangtao 2015-11-23 16:55:40 +08:00
parent 73e9d076d1
commit 77ce114bec
35 changed files with 1790 additions and 0 deletions

View File

@ -0,0 +1,37 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/classes" path="src/main/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry excluding="**" kind="src" output="target/test-classes" path="src/test/resources">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" path="src/main/assembly"/>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.7">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>

2
ossean_hotwords/.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target/
/log/*

23
ossean_hotwords/.project Normal file
View File

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>hotwords</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>

View File

@ -0,0 +1,6 @@
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/main/resources=UTF-8
encoding//src/test/java=UTF-8
encoding//src/test/resources=UTF-8
encoding/<project>=UTF-8

View File

@ -0,0 +1,5 @@
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
org.eclipse.jdt.core.compiler.compliance=1.7
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.7

View File

@ -0,0 +1,4 @@
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1

View File

@ -0,0 +1,24 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd">
<bean id="sqlSessionFactory" class="org.mybatis.spring.SqlSessionFactoryBean">
<property name="dataSource" ref="dataSource" />
</bean>
<bean class="org.mybatis.spring.mapper.MapperScannerConfigurer">
<property name="basePackage" value="com.ossean.hotwords" />
</bean>
<bean id="dataSource" class="org.apache.commons.dbcp.BasicDataSource"
destroy-method="close">
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
<property name="url"
value="jdbc:mysql://192.168.80.130:3306/ossean_production?characterEncoding=UTF-8" />
<property name="username" value="trustie" />
<property name="password" value="1234" />
</bean>
</beans>

View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:mvc="http://www.springframework.org/schema/mvc"
xsi:schemaLocation="http://www.springframework.org/schema/mvc
http://www.springframework.org/schema/mvc/spring-mvc-3.0.xsd
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context-3.0.xsd">
<context:annotation-config/>
<context:component-scan base-package="com.ossean.hotwords"/>
</beans>

View File

@ -0,0 +1,77 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
<appender name="stdout" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="threshold" value="ERROR" />
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<appender name="file" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="./log/error.log" />
<param name="threshold" value="ERROR" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<appender name="file_log" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="./log/webmagic.log" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<!-- 邮件只有ERROR时才会发送 -->
<appender name="MAIL" class="org.apache.log4j.net.SMTPAppender">
<param name="threshold" value="debug" />
<!-- 日志的错误级别 <param name="threshold" value="fatal"/> -->
<!-- 缓存文件大小日志达到512K时发送Email -->
<param name="BufferSize" value="1" />
<!-- 单位K -->
<param name="From" value="ossean_debug@163.com" />
<param name="SMTPHost" value="smtp.163.com" />
<param name="Subject" value="ossean-crawler-debug-log4jMessage" />
<param name="To" value="gcm3651@126.com" />
<param name="SMTPUsername" value="ossean_debug" />
<param name="SMTPPassword" value="goodwell123" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%-d{yyyy-MM-dd HH:mm:ss.SSS} [%p]-[%c] %m%n" />
</layout>
</appender>
<!-- 数据库状态-->
<appender name="DATABASE" class="org.apache.log4j.jdbc.JDBCAppender">
<param name="URL" value="jdbc:mysql://127.0.0.1:3306/webmagic?characterEncoding=UTF-8"/>
<param name="driver" value="com.mysql.jdbc.Driver"/>
<param name="user" value="root"/>
<param name="password" value="1234"/>
<param name="sql" value="INSERT INTO log4j(stamp,thread,info_level,class,message,logger) VALUES ('%d{yyyy-MM-dd HH:mm:ss}','%t','%p','%c','%m','%l')"/>
<!-- <layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="INSERT INTO log4j(stamp,thread,info_level,class,message,logger) VALUES ('%d{yyyy-MM-dd HH:mm:ss}','%t','%.50p','%.50c','%.1000m','%.50l')" />
</layout>-->
<!-- 过滤输出时Log内容在这里LevelMin是ERRORLevelMax都 FATAL所以输出DEBUG级别到FATAL级别的LOG数据-->
<filter class="org.apache.log4j.varia.LevelRangeFilter">
<param name="LevelMin" value="DEBUG"/>
<param name="LevelMax" value="FATAL"/>
</filter>
</appender>
<logger name="org.apache" additivity="false">
<level value="warn" />
<appender-ref ref="stdout" />
</logger>
<root>
<level value="info" />
<appender-ref ref="stdout" />
<appender-ref ref="file" />
<appender-ref ref="file_log" />
<!-- <appender-ref ref="MAIL" />-->
<!-- <appender-ref ref="DATABASE" /> -->
</root>
</log4j:configuration>

View File

@ -0,0 +1,20 @@
#!/bin/bash
find ./target/classes -name "*.properties"|xargs rm -f
find ./target/classes -name "*.xml"|xargs rm -f
find ./target/classes -name "*.dic"|xargs rm -f
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
tmp='./target/classes':$tmp
tmp='./target/hotwords-0.0.1-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
tmp='./bin/resources':$tmp
CLASSPATH=$tmp:$CLASSPATH
echo $CLASSPATH
JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
#echo JVM_ARGS=$JVM_ARGS
#ulimit -n 400000
#echo "" > nohup.out
java $JVM_ARGS -classpath $CLASSPATH com.ossean.hotwords.Main >>log/info.log 2>&1 &

View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_5" inherit-compiler-output="false">
<output url="file://$MODULE_DIR$/target/classes" />
<output-test url="file://$MODULE_DIR$/target/test-classes" />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
<sourceFolder url="file://$MODULE_DIR$/src/test/resources" type="java-test-resource" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

110
ossean_hotwords/pom.xml Normal file
View File

@ -0,0 +1,110 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>ossean</groupId>
<artifactId>hotwords</artifactId>
<version>0.0.1-SNAPSHOT</version>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<!-- m2eclipse wtp 0.12+ enabled to configure contextRoot, add by w.vela -->
<m2eclipse.wtp.contextRoot>/</m2eclipse.wtp.contextRoot>
<spring-version>3.1.1.RELEASE</spring-version>
<spring-security-version>3.1.0.RELEASE</spring-security-version>
</properties>
<build>
<sourceDirectory>${basedir}/src/main/java</sourceDirectory>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.5.1</version>
<configuration>
<descriptors>
<descriptor>src/main/assembly/assembly.xml</descriptor>
</descriptors>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>commons-collections</groupId>
<artifactId>commons-collections</artifactId>
<version>3.2.1</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-jdbc</artifactId>
<version>${spring-version}</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.1</version>
</dependency>
<dependency>
<groupId>javax.servlet</groupId>
<artifactId>servlet-api</artifactId>
<version>2.5</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.18</version>
</dependency>
<dependency>
<groupId>commons-dbcp</groupId>
<artifactId>commons-dbcp</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.7</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mybatis</groupId>
<artifactId>mybatis</artifactId>
<version>3.1.1</version>
</dependency>
<dependency>
<groupId>org.mybatis</groupId>
<artifactId>mybatis-spring</artifactId>
<version>1.1.1</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-test</artifactId>
<version>${spring-version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.7</version>
</dependency>
<dependency>
<groupId>cglib</groupId>
<artifactId>cglib-nodep</artifactId>
<version>3.1</version>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,25 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
<!-- TODO: a jarjar format would be better -->
<id>jar-with-dependencies-without-resources</id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<dependencySets>
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>false</useProjectArtifact>
<unpack>false</unpack>
<scope>runtime</scope>
</dependencySet>
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>false</useProjectArtifact>
<unpack>false</unpack>
<scope>system</scope>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,257 @@
package com.ossean.hotwords;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.annotation.Resource;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Component;
import com.ossean.hotwords.dao.OpenSourceProjectsDao;
import com.ossean.hotwords.dao.PointersDao;
import com.ossean.hotwords.dao.RelativeMemoToOpenSourceProjectsDao;
import com.ossean.hotwords.dao.RelativeMemosDao;
import com.ossean.hotwords.dao.TaggingsDao;
import com.ossean.hotwords.dao.TagsDao;
import com.ossean.hotwords.model.Hotwords;
import com.ossean.hotwords.model.OpenSourceProjects;
import com.ossean.hotwords.model.RelativeMemoToOpenSourceProjects;
import com.ossean.hotwords.model.RelativeMemos;
import com.ossean.hotwords.utils.InsertHotwords;
import com.ossean.hotwords.utils.MapSort;
import com.ossean.hotwords.utils.StringHandler;
import com.ossean.hotwords.utils.UpdateTagsAndTaggings;
@Component
public class Main {
Logger logger = Logger.getLogger(this.getClass());
@Resource
private PointersDao pointersDao;
@Resource
private OpenSourceProjectsDao ospDao;
@Resource
private RelativeMemoToOpenSourceProjectsDao memoToOspDao;
@Resource
private TaggingsDao taggingsDao;
@Resource
private RelativeMemosDao memoDao;
@Resource
private TagsDao tagsDao;
@Qualifier("updatetagsandtaggings")
@Autowired
private UpdateTagsAndTaggings updateClass;
@Qualifier("inserthotwords")
@Autowired
private InsertHotwords insertClass;
private static String sourceTableName = "open_source_projects";
private static String targetTableName = "hot_words";
private static int batchSize = 10;// 一次处理项目数量
public void start(){
while(true){
//读取断点位置
int startId = 1;
try {
startId = pointersDao.readPointer(sourceTableName, targetTableName);
} catch (Exception e){
//表示还没有该记录
pointersDao.insertPointer(sourceTableName, targetTableName);
}
//根据断点位置和批处理数量读取项目对象列表
List<OpenSourceProjects> projects = ospDao.getProjectsByBatch(startId, batchSize);
if(projects.size() == 0){
//如果没有独处项目信息 表示没有需要处理的项目
logger.info("no projects! Sleep 3600s");
try {
Thread.sleep(3600*1000L);
continue;
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
for(OpenSourceProjects project:projects){
//循环处理所有的项目
logger.info("匹配项目:" + project.getName() + " id:" + project.getId());
//读取relative_memo_to_open_source_projects表中当前项目对应的帖子
int prjId = project.getId();
List<RelativeMemoToOpenSourceProjects> mtps = memoToOspDao.getRelativeMemosByOspId(prjId, getTargetTable(prjId));
//定义存储标签name和标签权重的map 每个project一个map对象
Map<String, Float> tagWeightMap = new LinkedHashMap<String,Float>();
//定义存储标签name和标签词频的map 每个project一个map对象
Map<String, Integer> tagCountMap = new HashMap<String, Integer>();
//遍历每一个关联结果 即遍历每一个关联的帖子
for(RelativeMemoToOpenSourceProjects item:mtps){
//读取帖子id
int memoId = item.getRelative_memo_id();
//获取RelativeMemo对象
RelativeMemos memo = memoDao.getById(memoId);
//将RelativeMemos对象中的tags属性转换成List<String>
List<String> memoTagList = new ArrayList<String>();
try{
memoTagList = StringHandler.changeTagsToTagList(memo.getTags());
}catch(Exception e){
//logger.info("没有找到对应的tags标签 memo_id:" + memoId);
continue;
}
//循环遍历每个tagName 将对应的权重计算出来 得到最终的map
for(String tagName:memoTagList){
if(tagWeightMap.containsKey(tagName)){
//表示map中已经存在该标签 需要将原来标签的权重取出并加上相应的taggings表中记录的匹配权重
tagWeightMap.put(tagName, tagWeightMap.get(tagName) + item.getMatch_weight());
tagCountMap.put(tagName, tagCountMap.get(tagName) + 1);
}else{
//表示map中没有该标签名
tagWeightMap.put(tagName, item.getMatch_weight());
tagCountMap.put(tagName,1);
}
}
}
//将map根据值从大到小排序
tagWeightMap = MapSort.sort(tagWeightMap);
//构造hotwords的List
List<Hotwords> hotwordsList = new ArrayList<Hotwords>();
Set<String> tagNames = tagWeightMap.keySet();
Iterator<String> iterator = tagNames.iterator();
int count_hotwords = 0;
while(iterator.hasNext() && count_hotwords < 20){
String tagName = iterator.next();//读取标签的名字
Hotwords hotwords = new Hotwords();
hotwords.setOsp_id(project.getId());
hotwords.setName(tagName);
hotwords.setWeight(tagWeightMap.get(tagName));
count_hotwords++;
hotwordsList.add(hotwords);
}
//读取该项目的tags属性并转换成List
List<String> projectTagList = StringHandler.changeTagsToTagList(project.getTags());
//记录需要修改disagree_num的标签名
List<String> updateDisagreeNumTagNameList = new ArrayList<String>();
if(projectTagList.size() < 10){
//表示如果项目标签数量小于10 向里面添加5填新的标签记录
//遍历项目关联的所有标签
Set<String> keys = tagWeightMap.keySet();
Iterator<String> it = keys.iterator();
int count = 0;//用于对新添加到项目中的标签进行计数
//最多增加5个新的标签 最多增加到10个
while(it.hasNext() && count < 5){
String tagName = it.next();//取出标签的名字
if(StringHandler.isTagExist(tagName, projectTagList)){
//表示当前关联到的标签在项目标签中存在 需要对taggings表中disagree_num增加该标签出现的词频
updateDisagreeNumTagNameList.add(tagName);//将需要修改的标签名先存储到列表中 在事物处理的时候统一处理
continue;
}else{
//表示当前关联到的标签在项目原标签中不存在
projectTagList.add(tagName);//将新标签添加到项目标签列表中
count++;//每添加一个新标签 count计数器加1
}
}
//如果count > 0 表示tags属性需要更新taggings需要插入记录 同时需要向热词表中插入20条数据
if(count > 0){
String tagsNew = StringHandler.changeTagListToTags(projectTagList);
logger.info("正在进行更新项目标签信息和taggings表信息操作请勿中断程序");
updateClass.update(project, projectTagList, tagsNew, count, sourceTableName, targetTableName, hotwordsList, updateDisagreeNumTagNameList, tagCountMap);
}
else{
//表示没有新增的标签 只需要向热词表中添加20条热词
insertClass.insert(hotwordsList, sourceTableName, targetTableName, project.getId() + 1, updateDisagreeNumTagNameList, tagCountMap, project);
logger.info("当前项目" + project.getName() + "没有标签更新操作只插入了热词和更新disagree_num标签");
}
}
else{
//表示项目已经存在10条记录 需要再添加3个标签 已经存在的标签不算在3个内
//遍历项目关联的所有标签
Set<String> keys = tagWeightMap.keySet();
Iterator<String> it = keys.iterator();
int count = 0;//用于对新添加到项目中的标签进行计数
//最多增加5个新的标签 最多增加到10个
while(it.hasNext() && count < 3){
String tagName = it.next();//取出标签的名字
if(StringHandler.isTagExist(tagName, projectTagList)){
//表示当前关联到的标签在项目标签中存在 需要对taggings表中disagree_num增加该标签出现的词频
updateDisagreeNumTagNameList.add(tagName);//将需要修改的标签名先存储到列表中 在事物处理的时候统一处理
continue;
}else{
//表示当前关联到的标签在项目原标签中不存在
projectTagList.add(tagName);//将新标签添加到项目标签列表中
count++;//每添加一个新标签 count计数器加1
}
}
//如果count > 0 表示tags属性需要更新taggings需要插入记录 同时需要向热词表中插入20条数据
if(count > 0){
String tagsNew = StringHandler.changeTagListToTags(projectTagList);
logger.info("正在进行更新项目标签信息和taggings表信息操作请勿中断程序");
updateClass.update(project, projectTagList, tagsNew, count, sourceTableName, targetTableName, hotwordsList, updateDisagreeNumTagNameList, tagCountMap);
}
else{
//表示没有新增的标签 只需要向热词表中添加20条热词
insertClass.insert(hotwordsList, sourceTableName, targetTableName, project.getId() + 1, updateDisagreeNumTagNameList, tagCountMap, project);
logger.info("当前项目" + project.getName() + "只插入了热词和更新disagree_num标签");
}
insertClass.insert(hotwordsList, sourceTableName, targetTableName, project.getId() + 1, updateDisagreeNumTagNameList, tagCountMap, project);
logger.info("当前项目" + project.getName() + "只插入了热词和更新disagree_num标签");
}
}
}
}
/**
* get the match result table's name
* @param osp_id
* @return
*/
public static String getTargetTable(int osp_id){
String targetTableName = "";
if (osp_id < 500) {
targetTableName = "relative_memo_to_open_source_projects_1";
}
else if (osp_id >= 500 && osp_id < 1000) {
targetTableName = "relative_memo_to_open_source_projects_2";
}
else if (osp_id >= 1000 && osp_id < 1500) {
targetTableName = "relative_memo_to_open_source_projects_3";
}
else if (osp_id >= 1500 && osp_id < 2000) {
targetTableName = "relative_memo_to_open_source_projects_4";
}
else if (osp_id >= 2000 && osp_id < 3000) {
targetTableName = "relative_memo_to_open_source_projects_5";
}
else if (osp_id >= 3000 && osp_id < 5000) {
targetTableName = "relative_memo_to_open_source_projects_6";
}
else if (osp_id >= 5000 && osp_id < 7500) {
targetTableName = "relative_memo_to_open_source_projects_7";
}
else if (osp_id >= 7500 && osp_id < 10000) {
targetTableName = "relative_memo_to_open_source_projects_8";
}
else if (osp_id >= 10000 && osp_id < 310000) {
int a = 7 + osp_id/5000;
targetTableName = "relative_memo_to_open_source_projects_" + a;
}
else if (osp_id >= 310000) {
targetTableName = "relative_memo_to_open_source_projects_70";
}
return targetTableName;
}
public static void main(String[] args){
ApplicationContext applicationContext = new ClassPathXmlApplicationContext("classpath:/applicationContext*.xml");
Main mainClass = applicationContext.getBean(Main.class);
mainClass.start();
}
}

View File

@ -0,0 +1,22 @@
package com.ossean.hotwords.dao;
import java.util.List;
import org.apache.ibatis.annotations.Insert;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import com.ossean.hotwords.model.Hotwords;
public interface HotwordsDao {
//向热词表中添加数据
@Insert("insert into hot_words (`osp_id`,`name`,`weight`,`created_at`,`updated_at`) values (#{item.osp_id},#{item.name},#{item.weight},now(),now())")
public void insertItem(@Param("item") Hotwords item);
//查看是否存在osp_id和name对应的记录
@Select("select * from hot_words where osp_id=#{item.osp_id} and name=#{item.name}")
public List<Hotwords> findItem(@Param("item") Hotwords item);
}

View File

@ -0,0 +1,22 @@
package com.ossean.hotwords.dao;
import java.util.List;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import org.apache.ibatis.annotations.Update;
import com.ossean.hotwords.model.OpenSourceProjects;
public interface OpenSourceProjectsDao {
//读取一定数量的项目信息
@Select("select * from open_source_projects where id>=#{start} limit #{size}")
public List<OpenSourceProjects> getProjectsByBatch(@Param("start") int start, @Param("size") int size);
//对项目标签属性进行更新
@Update("update open_source_projects set tags=#{tags} where id=#{id}")
public void updateTagsOfProject(@Param("id") int id, @Param("tags") String tags);
}

View File

@ -0,0 +1,23 @@
package com.ossean.hotwords.dao;
import org.apache.ibatis.annotations.Insert;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import org.apache.ibatis.annotations.Update;
public interface PointersDao {
//读取pointers表中对应sourceTableName和targetTableNamed的指针
@Select("select Pointer from pointers where SourceTableName=#{source} and TargetTableName=#{target}")
public int readPointer(@Param("source") String source, @Param("target") String target);
//初始化指针 初始化大小为1
@Insert("insert into pointers (SourceTableName, TargetTableName, Pointer) values (#{source}, #{target}, 1)")
public void insertPointer(@Param("source") String source, @Param("target") String target);
//更新指针
@Update("update pointers set Pointer=#{pointer} where SourceTableName=#{source} and TargetTableName=#{target}")
public void updatePointer(@Param("source") String source, @Param("target") String target, @Param("pointer") int pointer);
}

View File

@ -0,0 +1,16 @@
package com.ossean.hotwords.dao;
import java.util.List;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import com.ossean.hotwords.model.RelativeMemoToOpenSourceProjects;
public interface RelativeMemoToOpenSourceProjectsDao {
//读取项目id对应的所有关联的帖子
@Select("select * from ${targetTableName} where osp_id = #{osp_id} and match_weight > 2")
public List<RelativeMemoToOpenSourceProjects> getRelativeMemosByOspId(@Param("osp_id") int osp_id, @Param("targetTableName") String targetTableName);
}

View File

@ -0,0 +1,14 @@
package com.ossean.hotwords.dao;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import com.ossean.hotwords.model.RelativeMemos;
public interface RelativeMemosDao {
//根据帖子id获取帖子对象
@Select("select * from relative_memos where id=#{id}")
public RelativeMemos getById(@Param("id") int id);
}

View File

@ -0,0 +1,36 @@
package com.ossean.hotwords.dao;
import java.util.List;
import org.apache.ibatis.annotations.Insert;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import org.apache.ibatis.annotations.Update;
import com.ossean.hotwords.model.Taggings;
public interface TaggingsDao {
//根据帖子id读取taggings表中与帖子相关联的taggings记录
@Select("select * from taggings where taggable_id=#{memoId} and taggable_type='RelativeMemo'")
public List<Taggings> getByMemoId(@Param("memoId") int memoId);
//向taggings表中插入由匹配帖子标签得到的新项目标签
@Insert("insert into taggings (tag_id,taggable_id,taggable_type,disagree_num,context,created_at,tag_source) values (#{item.tag_id},#{item.taggable_id},#{item.taggable_type},#{item.disagree_num},#{item.context},now(),#{item.tag_source})")
public void insertTaggings(@Param("item") Taggings item);
//查看taggings表中是否存在要查询的标签
@Select("select * from taggings where tag_id=#{item.tag_id} and taggable_type=#{item.taggable_type} and taggable_id=#{item.taggable_id}")
public List<Taggings> findTaggings(@Param("item") Taggings item);
//修改disagree_num值
@Update("update taggings set disagree_num=#{value} where taggable_id=#{taggable_id} AND taggable_type=#{taggable_type} AND tag_id=#{tag_id} ")
public void updateDisagreeNum(@Param("value") int value, @Param("taggable_id") int taggable_id, @Param("taggable_type") String taggable_type, @Param("tag_id") int tag_id);
//获取disagree_num值
@Select("select disagree_num from taggings where tag_id=#{tag_id} and taggable_id=#{taggable_id} and taggable_type=#{taggable_type}")
public int getDisagreeNum(@Param("tag_id") int tag_id, @Param("taggable_id") int taggable_id, @Param("taggable_type") String taggable_type);
}

View File

@ -0,0 +1,20 @@
package com.ossean.hotwords.dao;
import java.util.List;
import org.apache.ibatis.annotations.Insert;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
public interface TagsDao {
//根据标签名 取出标签id
@Select("select id from tags where name=#{name}")
public List<Integer> getIdByName(@Param("name") String name);
//向tags表插入数据
@Insert("insert into tags (name) values (#{name})")
public void insertTag(@Param("name") String name);
}

View File

@ -0,0 +1,48 @@
package com.ossean.hotwords.model;
public class Hotwords {
private int id;
private int osp_id;
private String name;
private float weight;
private String created_at;
private String updated_at;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public int getOsp_id() {
return osp_id;
}
public void setOsp_id(int osp_id) {
this.osp_id = osp_id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public float getWeight() {
return weight;
}
public void setWeight(float weight) {
this.weight = weight;
}
public String getCreated_at() {
return created_at;
}
public void setCreated_at(String created_at) {
this.created_at = created_at;
}
public String getUpdated_at() {
return updated_at;
}
public void setUpdated_at(String updated_at) {
this.updated_at = updated_at;
}
}

View File

@ -0,0 +1,141 @@
package com.ossean.hotwords.model;
public class OpenSourceProjects {
private int id;
private String name;
private String description;
private int followers_num;
private String url;
private String language;
private int download_num;
private int view_num_crawled;
private String category;
private String crawled_time;
private String source;
private int view_num_local;
private String created_at;
private String updated_at;
private int ossean_score;
private int relative_memos_num;
private String created_time;
private String updated_time;
private String tags;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public int getFollowers_num() {
return followers_num;
}
public void setFollowers_num(int followers_num) {
this.followers_num = followers_num;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
public int getDownload_num() {
return download_num;
}
public void setDownload_num(int download_num) {
this.download_num = download_num;
}
public int getView_num_crawled() {
return view_num_crawled;
}
public void setView_num_crawled(int view_num_crawled) {
this.view_num_crawled = view_num_crawled;
}
public String getCategory() {
return category;
}
public void setCategory(String category) {
this.category = category;
}
public String getCrawled_time() {
return crawled_time;
}
public void setCrawled_time(String crawled_time) {
this.crawled_time = crawled_time;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public int getView_num_local() {
return view_num_local;
}
public void setView_num_local(int view_num_local) {
this.view_num_local = view_num_local;
}
public String getCreated_at() {
return created_at;
}
public void setCreated_at(String created_at) {
this.created_at = created_at;
}
public String getUpdated_at() {
return updated_at;
}
public void setUpdated_at(String updated_at) {
this.updated_at = updated_at;
}
public int getOssean_score() {
return ossean_score;
}
public void setOssean_score(int ossean_score) {
this.ossean_score = ossean_score;
}
public int getRelative_memos_num() {
return relative_memos_num;
}
public void setRelative_memos_num(int relative_memos_num) {
this.relative_memos_num = relative_memos_num;
}
public String getCreated_time() {
return created_time;
}
public void setCreated_time(String created_time) {
this.created_time = created_time;
}
public String getUpdated_time() {
return updated_time;
}
public void setUpdated_time(String updated_time) {
this.updated_time = updated_time;
}
public String getTags() {
return tags;
}
public void setTags(String tags) {
this.tags = tags;
}
}

View File

@ -0,0 +1,52 @@
package com.ossean.hotwords.model;
import java.util.List;
public class RelativeMemoToOpenSourceProjects {
private int id;
private int osp_id;
private int relative_memo_id;
private float match_weight;
private String created_time;
private List<Integer> tagIds;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public int getOsp_id() {
return osp_id;
}
public void setOsp_id(int osp_id) {
this.osp_id = osp_id;
}
public int getRelative_memo_id() {
return relative_memo_id;
}
public void setRelative_memo_id(int relative_memo_id) {
this.relative_memo_id = relative_memo_id;
}
public float getMatch_weight() {
return match_weight;
}
public void setMatch_weight(float match_weight) {
this.match_weight = match_weight;
}
public String getCreated_time() {
return created_time;
}
public void setCreated_time(String created_time) {
this.created_time = created_time;
}
public List<Integer> getTagIds() {
return tagIds;
}
public void setTagIds(List<Integer> tagIds) {
this.tagIds = tagIds;
}
}

View File

@ -0,0 +1,209 @@
package com.ossean.hotwords.model;
public class RelativeMemos {
private int id;
private int osp_id;
private String subject;
private String content;
private String author;
private int replies_num;
private int lock;
private int sticky;
private String created_time;
private String updated_time;
private String url;
private int view_num_crawled;
private int vote_up_num;
private int collection_num;
private String abstractText;
private String memo_type;
private String source;
private String category;
private int view_num_trustie;
private int author_id;
private int parent_id;
private int last_reply_id;
private int is_quote;
private String username;
private String userhomeurl;
private String crawled_time;
private String author_url;
private String url_md5;
private String tags;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public int getOsp_id() {
return osp_id;
}
public void setOsp_id(int osp_id) {
this.osp_id = osp_id;
}
public String getSubject() {
return subject;
}
public void setSubject(String subject) {
this.subject = subject;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public int getReplies_num() {
return replies_num;
}
public void setReplies_num(int replies_num) {
this.replies_num = replies_num;
}
public int getLock() {
return lock;
}
public void setLock(int lock) {
this.lock = lock;
}
public int getSticky() {
return sticky;
}
public void setSticky(int sticky) {
this.sticky = sticky;
}
public String getCreated_time() {
return created_time;
}
public void setCreated_time(String created_time) {
this.created_time = created_time;
}
public String getUpdated_time() {
return updated_time;
}
public void setUpdated_time(String updated_time) {
this.updated_time = updated_time;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public int getView_num_crawled() {
return view_num_crawled;
}
public void setView_num_crawled(int view_num_crawled) {
this.view_num_crawled = view_num_crawled;
}
public int getVote_up_num() {
return vote_up_num;
}
public void setVote_up_num(int vote_up_num) {
this.vote_up_num = vote_up_num;
}
public int getCollection_num() {
return collection_num;
}
public void setCollection_num(int collection_num) {
this.collection_num = collection_num;
}
public String getAbstractText() {
return abstractText;
}
public void setAbstractText(String abstractText) {
this.abstractText = abstractText;
}
public String getMemo_type() {
return memo_type;
}
public void setMemo_type(String memo_type) {
this.memo_type = memo_type;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public String getCategory() {
return category;
}
public void setCategory(String category) {
this.category = category;
}
public int getView_num_trustie() {
return view_num_trustie;
}
public void setView_num_trustie(int view_num_trustie) {
this.view_num_trustie = view_num_trustie;
}
public int getAuthor_id() {
return author_id;
}
public void setAuthor_id(int author_id) {
this.author_id = author_id;
}
public int getParent_id() {
return parent_id;
}
public void setParent_id(int parent_id) {
this.parent_id = parent_id;
}
public int getLast_reply_id() {
return last_reply_id;
}
public void setLast_reply_id(int last_reply_id) {
this.last_reply_id = last_reply_id;
}
public int getIs_quote() {
return is_quote;
}
public void setIs_quote(int is_quote) {
this.is_quote = is_quote;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public String getUserhomeurl() {
return userhomeurl;
}
public void setUserhomeurl(String userhomeurl) {
this.userhomeurl = userhomeurl;
}
public String getCrawled_time() {
return crawled_time;
}
public void setCrawled_time(String crawled_time) {
this.crawled_time = crawled_time;
}
public String getAuthor_url() {
return author_url;
}
public void setAuthor_url(String author_url) {
this.author_url = author_url;
}
public String getUrl_md5() {
return url_md5;
}
public void setUrl_md5(String url_md5) {
this.url_md5 = url_md5;
}
public String getTags() {
return tags;
}
public void setTags(String tags) {
this.tags = tags;
}
}

View File

@ -0,0 +1,75 @@
package com.ossean.hotwords.model;
public class Taggings {
private int id;
private int tag_id;
private int taggable_id;
private String taggable_type;
private int tagger_id;
private String tagger_type;
private String context;
private String created_at;
private int disagree_num;
private String tag_source;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public int getTag_id() {
return tag_id;
}
public void setTag_id(int tag_id) {
this.tag_id = tag_id;
}
public int getTaggable_id() {
return taggable_id;
}
public void setTaggable_id(int taggable_id) {
this.taggable_id = taggable_id;
}
public String getTaggable_type() {
return taggable_type;
}
public void setTaggable_type(String taggable_type) {
this.taggable_type = taggable_type;
}
public int getTagger_id() {
return tagger_id;
}
public void setTagger_id(int tagger_id) {
this.tagger_id = tagger_id;
}
public String getTagger_type() {
return tagger_type;
}
public void setTagger_type(String tagger_type) {
this.tagger_type = tagger_type;
}
public String getContext() {
return context;
}
public void setContext(String context) {
this.context = context;
}
public String getCreated_at() {
return created_at;
}
public void setCreated_at(String created_at) {
this.created_at = created_at;
}
public int getDisagree_num() {
return disagree_num;
}
public void setDisagree_num(int disagree_num) {
this.disagree_num = disagree_num;
}
public String getTag_source() {
return tag_source;
}
public void setTag_source(String tag_source) {
this.tag_source = tag_source;
}
}

View File

@ -0,0 +1,68 @@
package com.ossean.hotwords.utils;
import java.util.List;
import java.util.Map;
import javax.annotation.Resource;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;
import com.ossean.hotwords.dao.HotwordsDao;
import com.ossean.hotwords.dao.PointersDao;
import com.ossean.hotwords.dao.TaggingsDao;
import com.ossean.hotwords.dao.TagsDao;
import com.ossean.hotwords.model.Hotwords;
import com.ossean.hotwords.model.OpenSourceProjects;
@Component("inserthotwords")
public class InsertHotwords {
@Resource
private HotwordsDao hotwordsDao;
@Resource
private PointersDao pointersDao;
@Resource
private TagsDao tagsDao;
@Resource
private TaggingsDao taggingsDao;
@Transactional(propagation=Propagation.REQUIRES_NEW)
public void insert(List<Hotwords> hotwordsList, String sourceTableName, String targetTableName, int pointer, List<String> updateDisagreeNumTagNameList, Map<String, Integer> tagCountMap, OpenSourceProjects project){
int count = 0;//用于纪录插入了多少个热词
for(int i = 0; i < hotwordsList.size() && count < 20; i++){
Hotwords hotwords = hotwordsList.get(i);
//首先检查该osp_id和name的hotwords记录是否存在
List<Hotwords> tmp = hotwordsDao.findItem(hotwords);
if(tmp.size() != 0){
//表示当前的记录存在
continue;
}
//循环插入每一个hotwords对象
hotwordsDao.insertItem(hotwords);
count++;
}
//修改需要增加disagree_num的词频
for(int i = 0; i < updateDisagreeNumTagNameList.size(); i++){
String tagName = updateDisagreeNumTagNameList.get(i);
//根据tagName查找tagId
List<Integer> ids = tagsDao.getIdByName(tagName);
if(ids.size() == 0){
//表示并不存在该标签 需要在tags表中增加这条记录
tagsDao.insertTag(tagName);
i--;
continue;
}
int tag_id = ids.get(0);
int taggable_id = project.getId();
String taggable_type = "OpenSourceProject";
int value = taggingsDao.getDisagreeNum(tag_id, taggable_id, taggable_type);
value = value + tagCountMap.get(tagName);//增加标签在帖子中出现的次数
taggingsDao.updateDisagreeNum(value, taggable_id, taggable_type, tag_id);
}
pointersDao.updatePointer(sourceTableName, targetTableName, pointer);
}
}

View File

@ -0,0 +1,33 @@
package com.ossean.hotwords.utils;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
public class MapSort {
public static Map<String, Float> sort(Map<String, Float> oldMap){
ArrayList<Map.Entry<String, Float>> list = new ArrayList<Map.Entry<String, Float>>(oldMap.entrySet());
Collections.sort(list, new Comparator<Map.Entry<String, Float>>() {
public int compare(Entry<String, Float> arg0,
Entry<String, Float> arg1) {
if(arg0.getValue() - arg1.getValue() > 0)
return -1;
else if(arg0.getValue() - arg1.getValue() < 0)
return 1;
else
return 0;
}
});
Map<String,Float> newMap = new LinkedHashMap<String,Float>();
for (int i = 0; i < list.size(); i++) {
newMap.put(list.get(i).getKey(), list.get(i).getValue());
}
return newMap;
}
}

View File

@ -0,0 +1,58 @@
package com.ossean.hotwords.utils;
import java.util.ArrayList;
import java.util.List;
public class StringHandler {
//将tags字符串转换成List<String>
public static List<String> changeTagsToTagList(String tags){
List<String> result = new ArrayList<String>();
if(tags != null && !"".equals(tags)){
String[] strings = tags.split(",");
for(String str:strings){
int index1 = str.indexOf("<");
int index2 = str.indexOf(">");
if(index2 > index1 && index1 >= 0){
//读取当前的标签
String tag = str.substring(index1 + 1, index2);
if(!"".equals(tag) )
result.add(tag);
}
}
}
return result;
}
//将tagList转换成字符串
public static String changeTagListToTags(List<String> tagList){
String result = "";
for(String tag:tagList){
result += "<" + tag + ">,";
}
if(!"".equals(result)){
//表示需要转换的标签数量不为0
result = result.substring(0, result.length() - 1);
}else{
//表示没有需要转换的标签
result = null;
}
return result;
}
//判断标签在List中是否存在
public static boolean isTagExist(String tag, List<String> tags){
for(int i = 0; i < tags.size(); i++){
String tagName = tags.get(i);
if(tagName.equals(tag)){
//表示存在相同的标签
return true;
}
}
return false;//遍历完还没有返回true 就表示没有相同名称的标签
}
}

View File

@ -0,0 +1,81 @@
package com.ossean.hotwords.utils;
import java.util.List;
import java.util.Map;
import javax.annotation.Resource;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;
import com.ossean.hotwords.dao.OpenSourceProjectsDao;
import com.ossean.hotwords.dao.PointersDao;
import com.ossean.hotwords.dao.TaggingsDao;
import com.ossean.hotwords.dao.TagsDao;
import com.ossean.hotwords.model.Hotwords;
import com.ossean.hotwords.model.OpenSourceProjects;
import com.ossean.hotwords.model.Taggings;
@Component("updatetagsandtaggings")
public class UpdateTagsAndTaggings {
Logger logger = Logger.getLogger(this.getClass());
@Resource
private TaggingsDao taggingsDao;
@Resource
private OpenSourceProjectsDao ospDao;
@Resource
private TagsDao tagsDao;
@Resource
private PointersDao pointersDao;
@Qualifier("inserthotwords")
@Autowired
private InsertHotwords insertClass;
@Transactional(propagation=Propagation.REQUIRES_NEW)
public void update(OpenSourceProjects project, List<String> projectTagList, String tagsNew, int count, String sourceTableName, String targetTableName, List<Hotwords> hotwordsList, List<String> updateDisagreeNumTagNameList, Map<String, Integer> tagCountMap){
//对新的项目标签(tags)属性进行更新
ospDao.updateTagsOfProject(project.getId(), tagsNew);
//对新加的标签进行遍历
for(int i = projectTagList.size() - 1; i >= projectTagList.size() - count; i--){
String tagName = projectTagList.get(i);
//取出相应tagName的标签
List<Integer> tagIds = tagsDao.getIdByName(tagName);
if(tagIds.size() == 0){
//表示tag不存在
logger.fatal("tag: " + tagName + " doesn't exist! We will insert one");
tagsDao.insertTag(tagName);
i++;
continue;
}else{
//表示标签存在
int tagId = tagIds.get(0);
//构造taggings对象
Taggings taggings = new Taggings();
taggings.setTag_id(tagId);
taggings.setTaggable_id(project.getId());
taggings.setDisagree_num(0);//初始化的disagree_num为0
taggings.setTaggable_type("OpenSourceProject");
taggings.setContext("tags");
taggings.setTag_source("FromRelativeMemo");
List<Taggings> tmp = taggingsDao.findTaggings(taggings);
if(tmp.size() != 0){
//表示已经存在相应的taggings记录
continue;
}else{
//将新记录插入taggings表中
taggingsDao.insertTaggings(taggings);
}
}
}
//将项目对应的热词插入hotwords表 在insert函数中进行指针的更新 同时更新已经存在在项目标签属性中的标签disagree_num值
insertClass.insert(hotwordsList, sourceTableName, targetTableName, project.getId() + 1, updateDisagreeNumTagNameList, tagCountMap, project);
logger.info("当前项目" + project.getName() + "的更新操作完成");
}
}

View File

@ -0,0 +1,39 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:aop="http://www.springframework.org/schema/aop"
xmlns:p="http://www.springframework.org/schema/p"
xmlns:tx="http://www.springframework.org/schema/tx"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context-3.0.xsd
http://www.springframework.org/schema/tx
http://www.springframework.org/schema/tx/spring-tx-3.0.xsd
http://www.springframework.org/schema/aop
http://www.springframework.org/schema/aop/spring-aop-3.0.xsd">
<bean id="sqlSessionFactory" class="org.mybatis.spring.SqlSessionFactoryBean">
<property name="dataSource" ref="dataSource" />
</bean>
<bean class="org.mybatis.spring.mapper.MapperScannerConfigurer">
<property name="basePackage" value="com.ossean.hotwords" />
</bean>
<bean id="dataSource" class="org.apache.commons.dbcp.BasicDataSource"
destroy-method="close">
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
<property name="url"
value="jdbc:mysql://localhost:3306/oss_dev?characterEncoding=UTF-8" />
<property name="username" value="root" />
<property name="password" value="" />
</bean>
<tx:annotation-driven transaction-manager="transactionManager"/>
<bean id="transactionManager" class="org.springframework.jdbc.datasource.DataSourceTransactionManager">
<property name="dataSource" ref="dataSource" />
</bean>
</beans>

View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:mvc="http://www.springframework.org/schema/mvc"
xsi:schemaLocation="http://www.springframework.org/schema/mvc
http://www.springframework.org/schema/mvc/spring-mvc-3.0.xsd
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context-3.0.xsd">
<context:annotation-config/>
<context:component-scan base-package="com.ossean.hotwords"/>
</beans>

View File

@ -0,0 +1,77 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
<appender name="stdout" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="threshold" value="ERROR" />
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<appender name="file" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="./log/error.log" />
<param name="threshold" value="ERROR" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<appender name="file_log" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="./log/webmagic.log" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<!-- 邮件只有ERROR时才会发送 -->
<appender name="MAIL" class="org.apache.log4j.net.SMTPAppender">
<param name="threshold" value="debug" />
<!-- 日志的错误级别 <param name="threshold" value="fatal"/> -->
<!-- 缓存文件大小日志达到512K时发送Email -->
<param name="BufferSize" value="1" />
<!-- 单位K -->
<param name="From" value="ossean_debug@163.com" />
<param name="SMTPHost" value="smtp.163.com" />
<param name="Subject" value="ossean-crawler-debug-log4jMessage" />
<param name="To" value="gcm3651@126.com" />
<param name="SMTPUsername" value="ossean_debug" />
<param name="SMTPPassword" value="goodwell123" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%-d{yyyy-MM-dd HH:mm:ss.SSS} [%p]-[%c] %m%n" />
</layout>
</appender>
<!-- 数据库状态-->
<appender name="DATABASE" class="org.apache.log4j.jdbc.JDBCAppender">
<param name="URL" value="jdbc:mysql://127.0.0.1:3306/webmagic?characterEncoding=UTF-8"/>
<param name="driver" value="com.mysql.jdbc.Driver"/>
<param name="user" value="root"/>
<param name="password" value="1234"/>
<param name="sql" value="INSERT INTO log4j(stamp,thread,info_level,class,message,logger) VALUES ('%d{yyyy-MM-dd HH:mm:ss}','%t','%p','%c','%m','%l')"/>
<!-- <layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="INSERT INTO log4j(stamp,thread,info_level,class,message,logger) VALUES ('%d{yyyy-MM-dd HH:mm:ss}','%t','%.50p','%.50c','%.1000m','%.50l')" />
</layout>-->
<!-- 过滤输出时Log内容在这里LevelMin是ERRORLevelMax都 FATAL所以输出DEBUG级别到FATAL级别的LOG数据-->
<filter class="org.apache.log4j.varia.LevelRangeFilter">
<param name="LevelMin" value="DEBUG"/>
<param name="LevelMax" value="FATAL"/>
</filter>
</appender>
<logger name="org.apache" additivity="false">
<level value="warn" />
<appender-ref ref="stdout" />
</logger>
<root>
<level value="info" />
<appender-ref ref="stdout" />
<appender-ref ref="file" />
<appender-ref ref="file_log" />
<!-- <appender-ref ref="MAIL" />-->
<!-- <appender-ref ref="DATABASE" /> -->
</root>
</log4j:configuration>

View File

@ -0,0 +1,41 @@
package hotwords;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.annotation.Resource;
import com.ossean.hotwords.dao.HotwordsDao;
import com.ossean.hotwords.model.Hotwords;
import com.ossean.hotwords.utils.MapSort;
import com.ossean.hotwords.utils.StringHandler;
public class Test {
public static void main(String[] args){
// String str = "<tag1>,<tag2>,<tag3>";
// List<String> result = StringHandler.changeTagsToTagList(str);
// System.out.println("..." + result.get(0));
// Map<String, Float> map = new HashMap<String, Float>();
// map.put("tag1", (float) 15.30);
// map.put("tag2", (float) 10.33);
// map.put("tag3", (float) 20.66);
// Map<String, Float> result = MapSort.sort(map);
// System.out.println("afdsfdsf");
// List<String> tagList = new ArrayList<String>();
// tagList.add("tag1");
// tagList.add("tag2");
// tagList.add("tag3");
// String tags = StringHandler.changeTagListToTags(tagList);
// System.out.println(tags);
Hotwords hotwords = new Hotwords();
hotwords.setOsp_id(1);
hotwords.setName("Nigel");
hotwords.setWeight(20.0f);
}
}

View File

@ -0,0 +1,77 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
<appender name="stdout" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="threshold" value="ERROR" />
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<appender name="file" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="./log/error.log" />
<param name="threshold" value="ERROR" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<appender name="file_log" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="./log/webmagic.log" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<!-- 邮件只有ERROR时才会发送 -->
<appender name="MAIL" class="org.apache.log4j.net.SMTPAppender">
<param name="threshold" value="debug" />
<!-- 日志的错误级别 <param name="threshold" value="fatal"/> -->
<!-- 缓存文件大小日志达到512K时发送Email -->
<param name="BufferSize" value="1" />
<!-- 单位K -->
<param name="From" value="ossean_debug@163.com" />
<param name="SMTPHost" value="smtp.163.com" />
<param name="Subject" value="ossean-crawler-debug-log4jMessage" />
<param name="To" value="gcm3651@126.com" />
<param name="SMTPUsername" value="ossean_debug" />
<param name="SMTPPassword" value="goodwell123" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%-d{yyyy-MM-dd HH:mm:ss.SSS} [%p]-[%c] %m%n" />
</layout>
</appender>
<!-- 数据库状态-->
<appender name="DATABASE" class="org.apache.log4j.jdbc.JDBCAppender">
<param name="URL" value="jdbc:mysql://127.0.0.1:3306/webmagic?characterEncoding=UTF-8"/>
<param name="driver" value="com.mysql.jdbc.Driver"/>
<param name="user" value="root"/>
<param name="password" value="1234"/>
<param name="sql" value="INSERT INTO log4j(stamp,thread,info_level,class,message,logger) VALUES ('%d{yyyy-MM-dd HH:mm:ss}','%t','%p','%c','%m','%l')"/>
<!-- <layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="INSERT INTO log4j(stamp,thread,info_level,class,message,logger) VALUES ('%d{yyyy-MM-dd HH:mm:ss}','%t','%.50p','%.50c','%.1000m','%.50l')" />
</layout>-->
<!-- 过滤输出时Log内容在这里LevelMin是ERRORLevelMax都 FATAL所以输出DEBUG级别到FATAL级别的LOG数据-->
<filter class="org.apache.log4j.varia.LevelRangeFilter">
<param name="LevelMin" value="DEBUG"/>
<param name="LevelMax" value="FATAL"/>
</filter>
</appender>
<logger name="org.apache" additivity="false">
<level value="warn" />
<appender-ref ref="stdout" />
</logger>
<root>
<level value="info" />
<appender-ref ref="stdout" />
<appender-ref ref="file" />
<appender-ref ref="file_log" />
<!-- <appender-ref ref="MAIL" />-->
<!-- <appender-ref ref="DATABASE" /> -->
</root>
</log4j:configuration>