add ossean_tag to the branch
This commit is contained in:
parent
489737bb85
commit
192bff31fe
|
@ -1 +0,0 @@
|
|||
Subproject commit cd55a2a26844916e8882517932f995ba78e80050
|
|
@ -0,0 +1,18 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<classpath>
|
||||
<classpathentry kind="src" path="src/main/java"/>
|
||||
<classpathentry kind="src" path="src/main/assembly"/>
|
||||
<classpathentry kind="src" path="src/main/resources"/>
|
||||
<classpathentry kind="src" path="src/test/java"/>
|
||||
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5">
|
||||
<attributes>
|
||||
<attribute name="maven.pomderived" value="true"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
|
||||
<attributes>
|
||||
<attribute name="maven.pomderived" value="true"/>
|
||||
</attributes>
|
||||
</classpathentry>
|
||||
<classpathentry kind="output" path="target/classes"/>
|
||||
</classpath>
|
|
@ -0,0 +1,6 @@
|
|||
/target/
|
||||
#.project
|
||||
#/.settings
|
||||
/lib/
|
||||
/procedure_for_tag
|
||||
/log/*
|
|
@ -0,0 +1,23 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<projectDescription>
|
||||
<name>procedure_for_tag</name>
|
||||
<comment></comment>
|
||||
<projects>
|
||||
</projects>
|
||||
<buildSpec>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.jdt.core.javabuilder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
<buildCommand>
|
||||
<name>org.eclipse.m2e.core.maven2Builder</name>
|
||||
<arguments>
|
||||
</arguments>
|
||||
</buildCommand>
|
||||
</buildSpec>
|
||||
<natures>
|
||||
<nature>org.eclipse.jdt.core.javanature</nature>
|
||||
<nature>org.eclipse.m2e.core.maven2Nature</nature>
|
||||
</natures>
|
||||
</projectDescription>
|
|
@ -0,0 +1,5 @@
|
|||
eclipse.preferences.version=1
|
||||
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
|
||||
org.eclipse.jdt.core.compiler.compliance=1.5
|
||||
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
|
||||
org.eclipse.jdt.core.compiler.source=1.5
|
|
@ -0,0 +1,4 @@
|
|||
activeProfiles=
|
||||
eclipse.preferences.version=1
|
||||
resolveWorkspaceProjects=true
|
||||
version=1
|
|
@ -0,0 +1,36 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<beans xmlns="http://www.springframework.org/schema/beans"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:context="http://www.springframework.org/schema/context"
|
||||
xmlns:mvc="http://www.springframework.org/schema/mvc"
|
||||
xsi:schemaLocation="http://www.springframework.org/schema/mvc
|
||||
http://www.springframework.org/schema/mvc/spring-mvc-3.0.xsd
|
||||
http://www.springframework.org/schema/beans
|
||||
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
|
||||
http://www.springframework.org/schema/context
|
||||
http://www.springframework.org/schema/context/spring-context-3.0.xsd">
|
||||
<context:annotation-config />
|
||||
|
||||
<!-- c3p0连接池配置 -->
|
||||
<bean id="dataSource" class="com.mchange.v2.c3p0.ComboPooledDataSource">
|
||||
<!-- 用户名 -->
|
||||
<property name="user" value="influx" />
|
||||
<!-- 用户密码 -->
|
||||
<property name="password" value="influx1234" />
|
||||
<property name="driverClass" value="com.mysql.jdbc.Driver" />
|
||||
<property name="jdbcUrl"
|
||||
value="jdbc:mysql://192.168.80.104:3306/buffer_test?useUnicode=true&characterEncoding=utf-8" />
|
||||
|
||||
<!--连接池中保留的最大连接数。默认值: 15 -->
|
||||
<property name="maxPoolSize" value="20" />
|
||||
<!-- 连接池中保留的最小连接数,默认为:3 -->
|
||||
<property name="minPoolSize" value="2" />
|
||||
<!-- 初始化连接池中的连接数,取值应在minPoolSize与maxPoolSize之间,默认为3 -->
|
||||
<property name="initialPoolSize" value="5" />
|
||||
|
||||
<!--最大空闲时间,60秒内未使用则连接被丢弃。若为0则永不丢弃。默认值: 0 -->
|
||||
<property name="maxIdleTime" value='60' />
|
||||
<property name="maxStatements" value='100' />
|
||||
<property name="maxStatementsPerConnection" value='20' />
|
||||
</bean>
|
||||
<!--context:component-scan base-package="net.trustie.webmagic.dao"/ -->
|
||||
</beans>
|
|
@ -0,0 +1,39 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
|
||||
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
|
||||
|
||||
<appender name="stdout" class="org.apache.log4j.ConsoleAppender">
|
||||
<layout class="org.apache.log4j.PatternLayout">
|
||||
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
|
||||
</layout>
|
||||
</appender>
|
||||
|
||||
<appender name="MAIL" class="org.apache.log4j.net.SMTPAppender">
|
||||
<param name="threshold" value="info" />
|
||||
<param name="BufferSize" value="512" />
|
||||
<param name="From" value="gcm365111@126.com" />
|
||||
<param name="SMTPHost" value="SMTP.126.com" />
|
||||
<param name="Subject" value="this is test" />
|
||||
<param name="SMTPUsername" value="gcm365111@126.com" />
|
||||
<param name="SMTPPassword" value="03023651gcm" />
|
||||
<param name="to" value="gcm3651@126.com" />
|
||||
<layout class="org.apache.log4j.PatternLayout">
|
||||
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
|
||||
</layout>
|
||||
</appender>
|
||||
|
||||
<appender name="file" class="org.apache.log4j.DailyRollingFileAppender">
|
||||
<param name="File" value="./log/info.log" />
|
||||
<layout class="org.apache.log4j.PatternLayout">
|
||||
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
|
||||
</layout>
|
||||
</appender>
|
||||
|
||||
|
||||
|
||||
<root>
|
||||
<level value="info" />
|
||||
<appender-ref ref="file" />
|
||||
</root>
|
||||
|
||||
</log4j:configuration>
|
|
@ -0,0 +1,21 @@
|
|||
#!/bin/bash
|
||||
|
||||
find ./target/classes -name "*.properties"|xargs rm -f
|
||||
find ./target/classes -name "*.xml"|xargs rm -f
|
||||
find ./target/classes -name "*.dic"|xargs rm -f
|
||||
|
||||
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
|
||||
|
||||
tmp='./bin/resources'
|
||||
tmp='./target/classes':$tmp
|
||||
tmp='./target/procedure_for_tag-0.0.1-SNAPSHOT-jar-with-dependencies-without-resources.jar':$tmp
|
||||
|
||||
CLASSPATH=$tmp:$CLASSPATH
|
||||
|
||||
|
||||
echo $CLASSPATH
|
||||
#JVM_ARGS="-Xms:512m -XX:MinPermSize=128m"
|
||||
#echo JVM_ARGS=$JVM_ARGS
|
||||
#ulimit -n 400000
|
||||
#echo "" > nohup.out
|
||||
java -classpath $CLASSPATH com.alan.myfunction.ProcedureTag>>info.log 2>&1 &
|
|
@ -0,0 +1,98 @@
|
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<properties>
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
|
||||
</properties>
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<groupId>org.kg</groupId>
|
||||
<artifactId>procedure_for_tag</artifactId>
|
||||
<version>0.0.1-SNAPSHOT</version>
|
||||
<name>procedure_for_tag</name>
|
||||
|
||||
<build>
|
||||
<sourceDirectory>${basedir}/src/main/java</sourceDirectory>
|
||||
<resources>
|
||||
<resource>
|
||||
<directory>${basedir}/src/main/resources</directory>
|
||||
<excludes>
|
||||
<exclude>*.xml</exclude>
|
||||
<exclude>*.properties</exclude>
|
||||
</excludes>
|
||||
</resource>
|
||||
</resources>
|
||||
<plugins>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-compiler-plugin</artifactId>
|
||||
<configuration>
|
||||
<source>1.7</source>
|
||||
<target>1.7</target>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<artifactId>maven-assembly-plugin</artifactId>
|
||||
<version>2.5.1</version>
|
||||
<configuration>
|
||||
<archive>
|
||||
<manifest>
|
||||
<mainClass>com.alan.myfunction.ProcedureTag</mainClass>
|
||||
</manifest>
|
||||
</archive>
|
||||
<descriptors>
|
||||
<descriptor>src/main/assembly/assembly.xml</descriptor>
|
||||
</descriptors>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
<dependencies>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework</groupId>
|
||||
<artifactId>spring-context</artifactId>
|
||||
<version>4.1.2.RELEASE</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>c3p0</groupId>
|
||||
<artifactId>c3p0</artifactId>
|
||||
<version>0.9.1.2</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>junit</groupId>
|
||||
<artifactId>junit</artifactId>
|
||||
<version>4.0</version>
|
||||
<type>jar</type>
|
||||
<scope>test</scope>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.quartz-scheduler</groupId>
|
||||
<artifactId>quartz</artifactId>
|
||||
<version>1.8.4</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>mysql</groupId>
|
||||
<artifactId>mysql-connector-java</artifactId>
|
||||
<version>5.1.30</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>commons-lang</groupId>
|
||||
<artifactId>commons-lang</artifactId>
|
||||
<version>2.6</version>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-log4j12</artifactId>
|
||||
<version>1.7.7</version>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
</project>
|
|
@ -0,0 +1,15 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
|
||||
<component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_5" inherit-compiler-output="false">
|
||||
<output url="file://$MODULE_DIR$/target/classes" />
|
||||
<output-test url="file://$MODULE_DIR$/target/test-classes" />
|
||||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/main/java" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/main/resources" type="java-resource" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src/test/java" isTestSource="true" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
|
@ -0,0 +1,19 @@
|
|||
<assembly
|
||||
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
|
||||
<!-- TODO: a jarjar format would be better -->
|
||||
<id>jar-with-dependencies-without-resources</id>
|
||||
<formats>
|
||||
<format>jar</format>
|
||||
</formats>
|
||||
<includeBaseDirectory>false</includeBaseDirectory>
|
||||
<dependencySets>
|
||||
<dependencySet>
|
||||
<outputDirectory>/</outputDirectory>
|
||||
<useProjectArtifact>false</useProjectArtifact>
|
||||
<unpack>true</unpack>
|
||||
<scope>runtime</scope>
|
||||
</dependencySet>
|
||||
</dependencySets>
|
||||
</assembly>
|
|
@ -0,0 +1,223 @@
|
|||
package com.alan.myfunction;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.Date;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class GenerateTags {
|
||||
private Logger logger = LoggerFactory.getLogger(getClass());
|
||||
public final static int batchSize = 5000;
|
||||
SQLConnection sqlconn = null;
|
||||
String tableName = "";
|
||||
String colName = "";
|
||||
|
||||
public GenerateTags(String tableName, String colName) {
|
||||
this.sqlconn = SQLConnection.getInstance();
|
||||
this.tableName = tableName;
|
||||
this.colName = colName;
|
||||
|
||||
this.initRecord(this.tableName);
|
||||
|
||||
// this.initRecord(this.ospHlpTable);
|
||||
}
|
||||
|
||||
public void extract() {
|
||||
// tagMatchRelativeMemo();
|
||||
extractFromTKnowledge();
|
||||
// tagMatchOSP();
|
||||
}
|
||||
|
||||
// 从t_knowledge中抽取
|
||||
private void extractFromTKnowledge() {
|
||||
Connection conn = this.sqlconn.getConnection();
|
||||
|
||||
try {
|
||||
conn.setAutoCommit(false);
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
|
||||
int maxId = getMaxId(conn, this.tableName);
|
||||
int minId = getMinId(conn, this.tableName);
|
||||
// int offset = 0;
|
||||
ResultSet rs = null;
|
||||
int lower = minId;
|
||||
int upper = minId + GenerateTags.batchSize;
|
||||
while (lower < maxId) {
|
||||
rs = getBatch(conn, this.tableName, this.colName, lower, upper);
|
||||
lower = lower + TagMatch.batchSize;
|
||||
upper = upper + TagMatch.batchSize;
|
||||
|
||||
if (rs != null) {
|
||||
batchExtract(rs, conn, this.tableName);
|
||||
}
|
||||
logger.info("extracted tags from t_knowledge: " + lower + "-->"
|
||||
+ upper);
|
||||
}
|
||||
|
||||
// type: 0-->osp, 1-->memo
|
||||
try {
|
||||
conn.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
// e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
// 取得要匹配的最大id
|
||||
private int getMaxId(Connection conn, String tableName) {
|
||||
int rt = 0;
|
||||
String sql = "SELECT MAX(id) AS num FROM " + tableName;
|
||||
try {
|
||||
PreparedStatement pst = conn.prepareStatement(sql);
|
||||
// pst.setString(1, type);
|
||||
// pst.set
|
||||
ResultSet rs = pst.executeQuery();
|
||||
rs.next();
|
||||
rt = rs.getInt("num");
|
||||
pst.close();
|
||||
rs.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
|
||||
return rt;
|
||||
}
|
||||
|
||||
// 取得需要匹配的最小id
|
||||
private int getMinId(Connection conn, String tableName) {
|
||||
int rt = 0;
|
||||
String recordName = tableName + "_for_tag";
|
||||
String sql = "SELECT insert_id from t_records WHERE insert_table = ?";
|
||||
try {
|
||||
PreparedStatement pst = conn.prepareStatement(sql);
|
||||
pst.setString(1, recordName);
|
||||
ResultSet rs = pst.executeQuery();
|
||||
rs.next();
|
||||
rt = rs.getInt("insert_id");
|
||||
pst.close();
|
||||
rs.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
return rt;
|
||||
}
|
||||
|
||||
// 取得一批需要匹配的条目
|
||||
private ResultSet getBatch(Connection conn, String tableName,
|
||||
String colName, int lower, int upper) {
|
||||
ResultSet rt = null;
|
||||
String sql = "SELECT id, " + colName + " AS tags from " + tableName
|
||||
+ " WHERE id > ? AND id <= ?";
|
||||
try {
|
||||
PreparedStatement pst = conn.prepareStatement(sql);
|
||||
// pst.setString(1, col);
|
||||
// pst.setString(2, table);
|
||||
pst.setInt(1, lower);
|
||||
pst.setInt(2, upper);
|
||||
rt = pst.executeQuery();
|
||||
// pst.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
|
||||
return rt;
|
||||
}
|
||||
|
||||
// 匹配一批 条目
|
||||
private void batchExtract(ResultSet rs, Connection conn, String tableName) {
|
||||
String rawTags = "";
|
||||
int cursor = 0;
|
||||
int itemId = 0;
|
||||
String sql = "INSERT IGNORE INTO tags (name) VALUES " + "(?)";
|
||||
try {
|
||||
PreparedStatement pst = conn.prepareStatement(sql);
|
||||
while (rs.next()) {
|
||||
cursor = rs.getInt("id");
|
||||
rawTags = rs.getString("tags");
|
||||
// System.out.println(rawTags);
|
||||
|
||||
String regx = "<(.*?)>";
|
||||
Pattern pattern = Pattern.compile(regx);
|
||||
Matcher matcher;
|
||||
|
||||
if ((rawTags != null)
|
||||
&& !("".equals(rawTags == null ? null : rawTags.trim()))) {
|
||||
// resultTags = resultTags.replace(" ", "");
|
||||
matcher = pattern.matcher(rawTags);
|
||||
while (matcher.find()) {
|
||||
if (matcher.group(1) != "") {
|
||||
pst.setString(1,
|
||||
matcher.group(1).trim().replace(" ", "")
|
||||
.toLowerCase());
|
||||
|
||||
pst.addBatch();
|
||||
}
|
||||
}
|
||||
// System.out.println(rs.getString(1));
|
||||
}
|
||||
}
|
||||
pst.executeBatch();
|
||||
conn.commit();
|
||||
|
||||
String recordName = tableName + "_for_tag";
|
||||
String updateSql = "UPDATE t_records SET insert_id = ? WHERE insert_table = ?";
|
||||
pst = conn.prepareStatement(updateSql);
|
||||
pst.setInt(1, cursor);
|
||||
pst.setString(2, recordName);
|
||||
pst.executeUpdate();
|
||||
conn.commit();
|
||||
pst.close();
|
||||
System.out.println(recordName + ": " + cursor);
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
}
|
||||
|
||||
// 初始化记录指针表
|
||||
private void initRecord(String table) {
|
||||
Connection conn = this.sqlconn.getConnection();
|
||||
String recordName = table + "_for_tag";
|
||||
ResultSet rs = null;
|
||||
String sql = "SELECT * from t_records where insert_table = ?";
|
||||
try {
|
||||
conn.setAutoCommit(false);
|
||||
PreparedStatement pst = conn.prepareStatement(sql);
|
||||
pst.setString(1, recordName);
|
||||
rs = pst.executeQuery();
|
||||
if (!rs.next()) {
|
||||
String initSql = "INSERT INTO t_records (insert_table, insert_id) VALUES (?, ?)";
|
||||
pst = conn.prepareStatement(initSql);
|
||||
pst.setString(1, recordName);
|
||||
pst.setInt(2, 0);
|
||||
pst.executeUpdate();
|
||||
}
|
||||
conn.commit();
|
||||
pst.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
|
||||
try {
|
||||
conn.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,30 @@
|
|||
package com.alan.myfunction;
|
||||
|
||||
import java.sql.SQLException;
|
||||
|
||||
|
||||
public class ProcedureTag {
|
||||
public static void main(String[] args) throws SQLException {
|
||||
|
||||
|
||||
while (true) {
|
||||
//MyJDBC.myexcute("t_knowledge");
|
||||
//GenerateTags gt = new GenerateTags("relative_memo_info_help", "tags");
|
||||
GenerateTags gt = new GenerateTags("t_knowledge", "tags");
|
||||
//从t_knowledge中分离中tag来存到tags中
|
||||
gt.extract();
|
||||
|
||||
|
||||
TagMatch tm = new TagMatch("relative_memo_info_help", "rm_id", "open_source_project_help", "osp_id");
|
||||
// 从relative_memo_info_help中取出记录与tags中 的记录匹配
|
||||
tm.match();
|
||||
|
||||
try {
|
||||
Thread.sleep(60000);
|
||||
} catch (InterruptedException e) {
|
||||
// TODO Auto-generated catch block
|
||||
e.printStackTrace();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,60 @@
|
|||
package com.alan.myfunction;
|
||||
|
||||
import java.beans.PropertyVetoException;
|
||||
import java.sql.Connection;
|
||||
import java.sql.SQLException;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.context.support.ClassPathXmlApplicationContext;
|
||||
import org.springframework.context.support.FileSystemXmlApplicationContext;
|
||||
import org.springframework.core.io.FileSystemResource;
|
||||
import org.springframework.core.io.Resource;
|
||||
|
||||
import com.mchange.v2.c3p0.ComboPooledDataSource;
|
||||
|
||||
public class SQLConnection {
|
||||
private Logger logger = LoggerFactory.getLogger(getClass());
|
||||
private static SQLConnection dbcputils = null;
|
||||
private ComboPooledDataSource cpds = null;
|
||||
|
||||
private SQLConnection() {
|
||||
if (cpds == null) {
|
||||
// ApplicationContext applicationContext = new
|
||||
// ClassPathXmlApplicationContext("applicationContext.xml");
|
||||
ApplicationContext applicationContext = new ClassPathXmlApplicationContext(
|
||||
"classpath:applicationContext.xml");
|
||||
this.cpds = (ComboPooledDataSource) applicationContext
|
||||
.getBean("dataSource");
|
||||
}
|
||||
}
|
||||
|
||||
public synchronized static SQLConnection getInstance() {
|
||||
if (dbcputils == null)
|
||||
dbcputils = new SQLConnection();
|
||||
return dbcputils;
|
||||
}
|
||||
|
||||
public Connection getConnection() {
|
||||
Connection con = null;
|
||||
try {
|
||||
con = cpds.getConnection();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
return con;
|
||||
}
|
||||
|
||||
public static void main(String[] args) throws SQLException {
|
||||
Connection con = null;
|
||||
long begin = System.currentTimeMillis();
|
||||
for (int i = 0; i < 1000000; i++) {
|
||||
con = SQLConnection.getInstance().getConnection();
|
||||
con.close();
|
||||
}
|
||||
long end = System.currentTimeMillis();
|
||||
System.out.println("<EFBFBD><EFBFBD>ʱΪ:" + (end - begin) + "ms");
|
||||
}
|
||||
}
|
|
@ -0,0 +1,325 @@
|
|||
package com.alan.myfunction;
|
||||
|
||||
import java.sql.Connection;
|
||||
import java.sql.Date;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.regex.Matcher;
|
||||
import java.util.regex.Pattern;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
public class TagMatch {
|
||||
private Logger logger = LoggerFactory.getLogger(getClass());
|
||||
public final static int batchSize = 5000;
|
||||
SQLConnection sqlconn = null;
|
||||
String memoHlpTable = "";
|
||||
String memoHlpColName = "";
|
||||
String ospHlpTable = "";
|
||||
String ospHlpColName = "";
|
||||
Map<String, Integer> tags = new HashMap<String, Integer>();
|
||||
|
||||
public TagMatch(String memoTable, String memoColName, String ospTable,
|
||||
String ospColName) {
|
||||
this.sqlconn = SQLConnection.getInstance();
|
||||
this.memoHlpTable = memoTable;
|
||||
this.memoHlpColName = memoColName;
|
||||
this.ospHlpTable = ospTable;
|
||||
this.ospHlpColName = ospColName;
|
||||
this.initTags();
|
||||
this.initRecord(this.memoHlpTable);
|
||||
// this.initRecord(this.ospHlpTable);
|
||||
}
|
||||
|
||||
public void match() {
|
||||
tagMatchRelativeMemo();
|
||||
// tagMatchOSP();
|
||||
}
|
||||
|
||||
// 分离帖子标签
|
||||
private void tagMatchRelativeMemo() {
|
||||
Connection conn = this.sqlconn.getConnection();
|
||||
|
||||
try {
|
||||
conn.setAutoCommit(false);
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
|
||||
int maxId = getMaxId(conn, this.memoHlpTable);
|
||||
int minId = getMinId(conn, this.memoHlpTable);
|
||||
// int offset = 0;
|
||||
ResultSet rs = null;
|
||||
int lower = minId;
|
||||
int upper = minId + TagMatch.batchSize;
|
||||
while (lower < maxId) {
|
||||
rs = getBatch(conn, this.memoHlpTable, this.memoHlpColName, lower,
|
||||
upper);
|
||||
lower = lower + TagMatch.batchSize;
|
||||
upper = upper + TagMatch.batchSize;
|
||||
|
||||
// type: 0-->osp, 1-->memo
|
||||
if (rs != null) {
|
||||
batchMatch(rs, conn, this.memoHlpTable);
|
||||
|
||||
}
|
||||
logger.info("matched tags from relative_memo_help: " + lower
|
||||
+ "-->" + upper);
|
||||
}
|
||||
|
||||
try {
|
||||
conn.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// 分离项目标签
|
||||
private void tagMatchOSP() {
|
||||
Connection conn = this.sqlconn.getConnection();
|
||||
|
||||
try {
|
||||
conn.setAutoCommit(false);
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
|
||||
int maxId = getMaxId(conn, this.ospHlpTable);
|
||||
int minId = getMinId(conn, this.ospHlpTable);
|
||||
int offset = 0;
|
||||
ResultSet rs = null;
|
||||
int lower = minId;
|
||||
int upper = minId + TagMatch.batchSize;
|
||||
while (lower < maxId) {
|
||||
rs = getBatch(conn, this.ospHlpTable, this.ospHlpColName, lower,
|
||||
upper);
|
||||
lower = lower + TagMatch.batchSize;
|
||||
upper = upper + TagMatch.batchSize;
|
||||
|
||||
if (rs != null) {
|
||||
batchMatch(rs, conn, this.ospHlpTable);
|
||||
}
|
||||
}
|
||||
|
||||
try {
|
||||
conn.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
}
|
||||
|
||||
// 取得要匹配的最大id
|
||||
private int getMaxId(Connection conn, String type) {
|
||||
int rt = 0;
|
||||
String sql = "SELECT MAX(id) AS num FROM " + type;
|
||||
try {
|
||||
PreparedStatement pst = conn.prepareStatement(sql);
|
||||
// pst.setString(1, type);
|
||||
// pst.set
|
||||
ResultSet rs = pst.executeQuery();
|
||||
rs.next();
|
||||
rt = rs.getInt("num");
|
||||
pst.close();
|
||||
rs.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
|
||||
return rt;
|
||||
}
|
||||
|
||||
// 取得需要匹配的最小id
|
||||
private int getMinId(Connection conn, String type) {
|
||||
int rt = 0;
|
||||
String sql = "SELECT insert_id from t_records WHERE insert_table = ?";
|
||||
try {
|
||||
PreparedStatement pst = conn.prepareStatement(sql);
|
||||
pst.setString(1, type);
|
||||
ResultSet rs = pst.executeQuery();
|
||||
rs.next();
|
||||
rt = rs.getInt("insert_id");
|
||||
pst.close();
|
||||
rs.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
|
||||
return rt;
|
||||
}
|
||||
|
||||
// 取得一批需要匹配的条目
|
||||
private ResultSet getBatch(Connection conn, String table, String col,
|
||||
int lower, int upper) {
|
||||
ResultSet rt = null;
|
||||
String sql = "SELECT id, " + col + " AS item_id, tags from " + table
|
||||
+ " WHERE id > ? AND id <= ?";
|
||||
try {
|
||||
PreparedStatement pst = conn.prepareStatement(sql);
|
||||
// pst.setString(1, col);
|
||||
// pst.setString(2, table);
|
||||
pst.setInt(1, lower);
|
||||
pst.setInt(2, upper);
|
||||
rt = pst.executeQuery();
|
||||
// pst.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
|
||||
return rt;
|
||||
}
|
||||
|
||||
// 匹配一批 条目
|
||||
private void batchMatch(ResultSet rs, Connection conn, String table) {
|
||||
String rawTag = "";
|
||||
int cursor = 0;
|
||||
int itemId = 0;
|
||||
String sql = "INSERT IGNORE INTO taggings (tag_id, taggable_id, taggable_type, context, created_at) VALUES "
|
||||
+ "(?, ?, ?, 'tags', ?)";
|
||||
try {
|
||||
PreparedStatement pst = conn.prepareStatement(sql);
|
||||
// pst.setString(4, "tags");
|
||||
pst.setDate(4, new java.sql.Date(new java.util.Date().getTime()));
|
||||
if ("relative_memo_info_help".equals(table)) {
|
||||
pst.setString(3, "RelativeMemo");
|
||||
} else if ("open_source_project".equals(table)) {
|
||||
pst.setString(3, "OpenSourceProject");
|
||||
} else {
|
||||
pst.setString(3, "Other");
|
||||
}
|
||||
|
||||
while (rs.next()) {
|
||||
cursor = rs.getInt("id");
|
||||
rawTag = rs.getString("tags");
|
||||
if (rawTag != null) {
|
||||
rawTag = rawTag.toLowerCase();
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
itemId = rs.getInt("item_id");
|
||||
pst.setInt(2, itemId);
|
||||
List<String> tagList = toList(rawTag);
|
||||
Iterator<String> it = tagList.iterator();
|
||||
while (it.hasNext()) {
|
||||
String tag = it.next();
|
||||
tag = tag.replace("'", "\\'");
|
||||
int tagId = 0;
|
||||
if (this.tags.containsKey(tag)) {
|
||||
tagId = this.tags.get(tag);
|
||||
}
|
||||
pst.setInt(1, tagId);
|
||||
pst.addBatch();
|
||||
}
|
||||
}
|
||||
|
||||
pst.executeBatch();
|
||||
conn.commit();
|
||||
|
||||
String updateSql = "UPDATE t_records SET insert_id = ? WHERE insert_table = ?";
|
||||
pst = conn.prepareStatement(updateSql);
|
||||
pst.setInt(1, cursor);
|
||||
pst.setString(2, table);
|
||||
pst.executeUpdate();
|
||||
conn.commit();
|
||||
pst.close();
|
||||
System.out.println(table + ": " + cursor);
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
}
|
||||
|
||||
// 字符串标签变链表
|
||||
private List<String> toList(String tags) {
|
||||
List<String> rt = new ArrayList<String>();
|
||||
|
||||
String regx = "<(.*?)>";
|
||||
Pattern pattern = Pattern.compile(regx);
|
||||
Matcher matcher;
|
||||
|
||||
// resultTags = resultTags.replace(" ", "");
|
||||
matcher = pattern.matcher(tags);
|
||||
while (matcher.find()) {
|
||||
if (matcher.group(1) != "") {
|
||||
rt.add(matcher.group(1).trim().replace(" ", ""));
|
||||
}
|
||||
}
|
||||
// System.out.println(rs.getString(1));
|
||||
|
||||
return rt;
|
||||
}
|
||||
|
||||
// 初始化标签,将标签读到内存中
|
||||
private void initTags() {
|
||||
Connection conn = this.sqlconn.getConnection();
|
||||
ResultSet rs = null;
|
||||
String sql = "SELECT id, name from tags";
|
||||
try {
|
||||
PreparedStatement pst = conn.prepareStatement(sql);
|
||||
rs = pst.executeQuery();
|
||||
while (rs.next()) {
|
||||
String name = rs.getString("name");
|
||||
Integer id = rs.getInt("id");
|
||||
this.tags.put(name, id);
|
||||
}
|
||||
pst.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
|
||||
try {
|
||||
conn.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
}
|
||||
|
||||
// 初始化记录指针表
|
||||
private void initRecord(String table) {
|
||||
Connection conn = this.sqlconn.getConnection();
|
||||
ResultSet rs = null;
|
||||
String sql = "SELECT * from t_records where insert_table = ?";
|
||||
try {
|
||||
PreparedStatement pst = conn.prepareStatement(sql);
|
||||
pst.setString(1, table);
|
||||
rs = pst.executeQuery();
|
||||
if (!rs.next()) {
|
||||
String initSql = "INSERT INTO t_records (insert_table, insert_id) VALUES (?, ?)";
|
||||
pst = conn.prepareStatement(initSql);
|
||||
pst.setString(1, table);
|
||||
pst.setInt(2, 0);
|
||||
pst.executeUpdate();
|
||||
}
|
||||
pst.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
|
||||
try {
|
||||
conn.close();
|
||||
} catch (SQLException e) {
|
||||
// TODO Auto-generated catch block
|
||||
logger.info(e.getStackTrace().toString());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,36 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<beans xmlns="http://www.springframework.org/schema/beans"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:context="http://www.springframework.org/schema/context"
|
||||
xmlns:mvc="http://www.springframework.org/schema/mvc"
|
||||
xsi:schemaLocation="http://www.springframework.org/schema/mvc
|
||||
http://www.springframework.org/schema/mvc/spring-mvc-3.0.xsd
|
||||
http://www.springframework.org/schema/beans
|
||||
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
|
||||
http://www.springframework.org/schema/context
|
||||
http://www.springframework.org/schema/context/spring-context-3.0.xsd">
|
||||
<context:annotation-config />
|
||||
|
||||
<!-- c3p0连接池配置 -->
|
||||
<bean id="dataSource" class="com.mchange.v2.c3p0.ComboPooledDataSource">
|
||||
<!-- 用户名 -->
|
||||
<property name="user" value="ossean" />
|
||||
<!-- 用户密码 -->
|
||||
<property name="password" value="1234" />
|
||||
<property name="driverClass" value="com.mysql.jdbc.Driver" />
|
||||
<property name="jdbcUrl"
|
||||
value="jdbc:mysql://192.168.120.129:3306/buffer_fix?useUnicode=true&characterEncoding=utf-8" />
|
||||
|
||||
<!--连接池中保留的最大连接数。默认值: 15 -->
|
||||
<property name="maxPoolSize" value="20" />
|
||||
<!-- 连接池中保留的最小连接数,默认为:3 -->
|
||||
<property name="minPoolSize" value="2" />
|
||||
<!-- 初始化连接池中的连接数,取值应在minPoolSize与maxPoolSize之间,默认为3 -->
|
||||
<property name="initialPoolSize" value="5" />
|
||||
|
||||
<!--最大空闲时间,60秒内未使用则连接被丢弃。若为0则永不丢弃。默认值: 0 -->
|
||||
<property name="maxIdleTime" value='60' />
|
||||
<property name="maxStatements" value='200' />
|
||||
<property name="maxStatementsPerConnection" value='30' />
|
||||
</bean>
|
||||
<!--context:component-scan base-package="net.trustie.webmagic.dao"/ -->
|
||||
</beans>
|
|
@ -0,0 +1,39 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
|
||||
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
|
||||
|
||||
<appender name="stdout" class="org.apache.log4j.ConsoleAppender">
|
||||
<layout class="org.apache.log4j.PatternLayout">
|
||||
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
|
||||
</layout>
|
||||
</appender>
|
||||
|
||||
<appender name="MAIL" class="org.apache.log4j.net.SMTPAppender">
|
||||
<param name="threshold" value="info" />
|
||||
<param name="BufferSize" value="512" />
|
||||
<param name="From" value="gcm365111@126.com" />
|
||||
<param name="SMTPHost" value="SMTP.126.com" />
|
||||
<param name="Subject" value="this is test" />
|
||||
<param name="SMTPUsername" value="gcm365111@126.com" />
|
||||
<param name="SMTPPassword" value="03023651gcm" />
|
||||
<param name="to" value="gcm3651@126.com" />
|
||||
<layout class="org.apache.log4j.PatternLayout">
|
||||
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
|
||||
</layout>
|
||||
</appender>
|
||||
|
||||
<appender name="file" class="org.apache.log4j.DailyRollingFileAppender">
|
||||
<param name="File" value="./log/info.log" />
|
||||
<layout class="org.apache.log4j.PatternLayout">
|
||||
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
|
||||
</layout>
|
||||
</appender>
|
||||
|
||||
|
||||
|
||||
<root>
|
||||
<level value="info" />
|
||||
<appender-ref ref="file" />
|
||||
</root>
|
||||
|
||||
</log4j:configuration>
|
|
@ -0,0 +1,18 @@
|
|||
package procedure_for_tag;
|
||||
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.context.support.ClassPathXmlApplicationContext;
|
||||
|
||||
import com.mchange.v2.c3p0.ComboPooledDataSource;
|
||||
|
||||
public class ApplicationContextTest {
|
||||
|
||||
public static void main(String[] args) {
|
||||
// TODO Auto-generated method stub
|
||||
ApplicationContext applicationContext = new ClassPathXmlApplicationContext(
|
||||
"applicationContext.xml");
|
||||
ComboPooledDataSource cpds = (ComboPooledDataSource) applicationContext
|
||||
.getBean("dataSource");
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,20 @@
|
|||
package procedure_for_tag;
|
||||
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
public class Sl4jTest {
|
||||
//private final Log logger = LogFactory.getLog(getClass());
|
||||
private Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
public void test(){
|
||||
logger.info("info2");
|
||||
logger.error("error2");
|
||||
}
|
||||
public static void main(String[] args) {
|
||||
// TODO Auto-generated method stub
|
||||
Sl4jTest st = new Sl4jTest();
|
||||
st.test();
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue