first commit

This commit is contained in:
zhanyun 2015-11-30 15:34:13 +08:00
commit 494499eadb
57 changed files with 281267 additions and 0 deletions

28
.classpath Normal file
View File

@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<classpath>
<classpathentry kind="src" output="target/classes" path="src/main/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" output="target/test-classes" path="src/test/java">
<attributes>
<attribute name="optional" value="true"/>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="src" path="src/main/assembly"/>
<classpathentry kind="src" path="src/main/resource"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.5">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="con" path="org.eclipse.m2e.MAVEN2_CLASSPATH_CONTAINER">
<attributes>
<attribute name="maven.pomderived" value="true"/>
</attributes>
</classpathentry>
<classpathentry kind="output" path="target/classes"/>
</classpath>

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
/target/
/log/*

23
.project Normal file
View File

@ -0,0 +1,23 @@
<?xml version="1.0" encoding="UTF-8"?>
<projectDescription>
<name>project_manager</name>
<comment></comment>
<projects>
</projects>
<buildSpec>
<buildCommand>
<name>org.eclipse.jdt.core.javabuilder</name>
<arguments>
</arguments>
</buildCommand>
<buildCommand>
<name>org.eclipse.m2e.core.maven2Builder</name>
<arguments>
</arguments>
</buildCommand>
</buildSpec>
<natures>
<nature>org.eclipse.jdt.core.javanature</nature>
<nature>org.eclipse.m2e.core.maven2Nature</nature>
</natures>
</projectDescription>

View File

@ -0,0 +1,4 @@
eclipse.preferences.version=1
encoding//src/main/java=UTF-8
encoding//src/test/java=UTF-8
encoding/<project>=UTF-8

View File

@ -0,0 +1,5 @@
eclipse.preferences.version=1
org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.5
org.eclipse.jdt.core.compiler.compliance=1.5
org.eclipse.jdt.core.compiler.problem.forbiddenReference=warning
org.eclipse.jdt.core.compiler.source=1.5

View File

@ -0,0 +1,4 @@
activeProfiles=
eclipse.preferences.version=1
resolveWorkspaceProjects=true
version=1

View File

@ -0,0 +1,20 @@
#!/bin/bash
find ./target/classes -name "*.properties"|xargs rm -f
find ./target/classes -name "*.xml"|xargs rm -f
find ./target/classes -name "*.dic"|xargs rm -f
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
tmp='./target/classes':$tmp
tmp='./target/EDBD-0.0.1-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
tmp='./bin/resources':$tmp
CLASSPATH=$tmp:$CLASSPATH
echo $CLASSPATH
JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
#echo JVM_ARGS=$JVM_ARGS
#ulimit -n 400000
#echo "" > nohup.out
java $JVM_ARGS -classpath $CLASSPATH com.ossean.GatherProjects apache_projects_to_gather_projects >>log/apache_projects_to_gather_projects.log 2>&1 &

View File

@ -0,0 +1,20 @@
#!/bin/bash
find ./target/classes -name "*.properties"|xargs rm -f
find ./target/classes -name "*.xml"|xargs rm -f
find ./target/classes -name "*.dic"|xargs rm -f
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
tmp='./target/classes':$tmp
tmp='./target/EDBD-0.0.1-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
tmp='./bin/resources':$tmp
CLASSPATH=$tmp:$CLASSPATH
echo $CLASSPATH
JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
#echo JVM_ARGS=$JVM_ARGS
#ulimit -n 400000
#echo "" > nohup.out
java $JVM_ARGS -classpath $CLASSPATH com.ossean.GatherProjects freecode_projects_to_gather_projects >>log/freecode_projects_to_gather_projects.log 2>&1 &

View File

@ -0,0 +1,20 @@
#!/bin/bash
find ./target/classes -name "*.properties"|xargs rm -f
find ./target/classes -name "*.xml"|xargs rm -f
find ./target/classes -name "*.dic"|xargs rm -f
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
tmp='./target/classes':$tmp
tmp='./target/EDBD-0.0.1-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
tmp='./bin/resources':$tmp
CLASSPATH=$tmp:$CLASSPATH
echo $CLASSPATH
JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
#echo JVM_ARGS=$JVM_ARGS
#ulimit -n 400000
#echo "" > nohup.out
java $JVM_ARGS -classpath $CLASSPATH com.ossean.GatherProjects openhub_projects_to_gather_projects >>log/openhub_projects_to_gather_projects.log 2>&1 &

View File

@ -0,0 +1,20 @@
#!/bin/bash
find ./target/classes -name "*.properties"|xargs rm -f
find ./target/classes -name "*.xml"|xargs rm -f
find ./target/classes -name "*.dic"|xargs rm -f
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
tmp='./target/classes':$tmp
tmp='./target/EDBD-0.0.1-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
tmp='./bin/resources':$tmp
CLASSPATH=$tmp:$CLASSPATH
echo $CLASSPATH
JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
#echo JVM_ARGS=$JVM_ARGS
#ulimit -n 400000
#echo "" > nohup.out
java $JVM_ARGS -classpath $CLASSPATH com.ossean.GatherProjects oschina_projects_to_gather_projects >>log/oschina_projects_to_gather_projects.log 2>&1 &

View File

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>IK Analyzer 扩展配置</comment>
<!--用户可以在这里配置自己的扩展字典 -->
<entry key="ext_dict">ext.dic;</entry>
<!--用户可以在这里配置自己的扩展停止词字典-->
<entry key="ext_stopwords">stopword_1.dic;code_keywords.dic;stopword.dic</entry>
</properties>

2
bin/resources/README.txt Normal file
View File

@ -0,0 +1,2 @@
配置文件和main2012, quantitifier要在ik文件夹下
停用词要在src文件夹下

View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:mvc="http://www.springframework.org/schema/mvc"
xsi:schemaLocation="http://www.springframework.org/schema/mvc
http://www.springframework.org/schema/mvc/spring-mvc-3.0.xsd
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context-3.0.xsd">
<context:annotation-config/>
<context:component-scan base-package="com.ossean"/>
</beans>

View File

@ -0,0 +1,61 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:aop="http://www.springframework.org/schema/aop"
xmlns:p="http://www.springframework.org/schema/p"
xmlns:tx="http://www.springframework.org/schema/tx"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context-3.0.xsd
http://www.springframework.org/schema/tx
http://www.springframework.org/schema/tx/spring-tx-3.0.xsd
http://www.springframework.org/schema/aop
http://www.springframework.org/schema/aop/spring-aop-3.0.xsd">
<!--SourceData -->
<bean id="dataSourceOne" class="org.apache.commons.dbcp.BasicDataSource"
destroy-method="close">
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
<property name="url"
value="jdbc:mysql://192.168.80.104:3306/extract_result?characterEncoding=UTF-8&amp;zeroDateTimeBehavior=convertToNull&amp;autoReconnect=true" />
<property name="username" value="influx" />
<property name="password" value="influx1234" />
<property name="validationQuery" value="SELECT 1" />
<property name="testOnBorrow" value="true"/>
</bean>
<bean id="sqlSessionFactoryOne" class="org.mybatis.spring.SqlSessionFactoryBean">
<property name="dataSource" ref="dataSourceOne" />
</bean>
<bean class="org.mybatis.spring.mapper.MapperScannerConfigurer">
<property name="basePackage" value="com.ossean.databaseSource" />
<property name="sqlSessionFactory" ref="sqlSessionFactoryOne"></property>
</bean>
<!--DestData -->
<bean id="dataSourceTwo" class="org.apache.commons.dbcp.BasicDataSource"
destroy-method="close">
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
<property name="url"
value="jdbc:mysql://192.168.80.130:3306/ossean_production?useUnicode=true&amp;characterEncoding=UTF-8&amp;zeroDateTimeBehavior=convertToNull&amp;autoReconnect=true" />
<property name="username" value="trustie" />
<property name="password" value="1234" />
<property name="validationQuery" value="SELECT 1" />
<property name="testOnBorrow" value="true"/>
</bean>
<bean id="sqlSessionFactoryTwo" class="org.mybatis.spring.SqlSessionFactoryBean">
<property name="dataSource" ref="dataSourceTwo" />
</bean>
<bean class="org.mybatis.spring.mapper.MapperScannerConfigurer">
<property name="basePackage" value="com.ossean.databaseDest" />
<property name="sqlSessionFactory" ref="sqlSessionFactoryTwo"></property>
</bean>
<tx:annotation-driven transaction-manager="transactionManager"/>
<bean id="transactionManager" class="org.springframework.jdbc.datasource.DataSourceTransactionManager">
<property name="dataSource" ref="dataSourceOne" />
</bean>
</beans>

View File

@ -0,0 +1,697 @@
private
protected
public
abstract
class
extends
final
implements
interface
native
new
static
strictfp
synchronized
transient
volatile
break
continue
return
do
while
if
else
for
instanceof
switch
case
defult
catch
finally
throw
throws
try
import
package
boolean
byte
char
double
float
int
long
short
null
true
false
super
this
void
form
do
sourceforge
com
cn
www
http
js
script
a
test
error
exception
about
website
able
abstract
console
sql
windows
exe
txt
doc
xls
local
net
web
server
from
using
error
can
date
file
xml
can
system
url
value
create
text
set
get
list
table
select
distinct
object
open
close
clear
all
time
have
org
main
start
end
version
private
public
index
api
method
source
root
content
write
read
view
one
page
run
log
win
bin
first
next
src
app
request
println
namespace
printf
button
title
local
define
lib
make
files
help
article
bool
boolean
config
load
args
date
thread
cpu
more
document
util
info
home
style
body
print
hello
world
find
left
std
debug
etc
like
top
now
map
context
other
post
format
client
encoding
session
program
datebase
control
language
base
process
want
values
used
status
project
color
array
loaclhost
please
click
integer
example
archive
response
command
event
param
what
core
build
link
display
copy
stdio
release
mode
after
check
cpp
c
port
target
should
library
lib
tcp
udp
state
sum
free
last
connect
configuration
none
download
software
buffer
query
bit
temp
word
block
two
send
device
layout
sdk
cache
alert
see
change
work
kernel
filter
handel
save
tools
min
count
header
level
framework
sys
email
push
pull
heap
stack
user
must
demo
just
serach
res
tmp
down
option
img
network
model
stop
problem
position
node
space
login
font
todo
background
resource
mac
bytes
ios
empty
note
self
tag
column
studio
red
done
baidu
sleep
convert
global
field
reference
way
settings
simple
ctrl
wait
meta
edit
runtime
store
equals
services
bean
ftp
exec
non
enter
lock
admin
configure
png
jpg
abc
loop
vector
setup
mail
flag
machine
sample
share
image
ref
join
where
parameter
users
contain
know
template
class
math
pdf
parse
timeout
cat
through
basic
invoke
resources
media
uri
paltform
err
sudo
range
menu
mapping
tables
master
memset
reset
environment
pop
commit
report
task
active
domain
good
step
setting
day
hash
move
collections
phone
engine
callback
datetime
objects
mobile
profile
always
storage
alt
trim
applications
via
design
nothing
makefile
clean
small
solution
custom
people
projects
family
attributes
enum
own
works
messages
low
signal
browser
give
aaa
event
assert
look
company
person
either
case
desktop
touch
book
unit
docs
params
dump
pool
feature
hashmap
seconds
owner
goto
keys
editor
env
month
year
loading
changes
black
fetch
guide
enterprise
multi
foundation
reflect
unique
upload
play
idea
exist
elements
account
numbers
examples
plain
solid
game
hide
apps
symbol
layer
least
weight
beta
jpeg
attr
refresh
apply
focus
fun
cfg
threads
receive
recieved
easy
hard
mode
logs
password
passwd
packages
ignore
scripts
cell
later
plus
his
pub
going
logging
channel
think
serial
things
loader
player
libs
team
backup
conector
side
utf8
speed
doing
price
timestamp
repository
init
HelloWorld
tesing
scan
components
speed
unlock
products
mid
said
token
optional
today
rest
pack
coding
contain
points
days
safe
typeof
community
others
manual
sets
closed
fine
groups
specify
sources
workder
problems
perform
servers
articles
skip
blocks
connections
related
notify
onload
known
assembly
useful
monitor
thing
forum
come
notification
rules
developers
cross
notice
soft
progress
fork
hosts
logger
role
everything
future
references
jni
locale
follow
words
updates
necessary
writer
mouse
sign
repoter
containing
route
analysis
four
comments
programs
maps
turn
versions
drive
parser
track
money
utils
reload
decimal
resume
behavior
hand
board
light
regex
terminal
reduce
requests
fff
0xfffff
strong
edge
schedule
pair
swf
tom
dependency
rule
escape
hook
temporary
issues
exchange
review
various
life
friend
face
complex
readme
prop
expert
extend
often
automatic
capture
difference
scheme
develop
together
steps
reply
beginning
understand
identified
wall
along
market
minute
hour
university
plan
templates
codes
ccc
diff
ppt
study
abcd
lost
choice
explain
likely
guid
taken
period
FAQ
symbols
thus
tips
compute
school
languages
describe
resolution
deal
detect
yellow
hope
visit
xyz
iii
dog
jump
knowledge
publish
inserted
levels
chip
preference
done
adjust
earth
expressions
house
central
rich
chat
sessions
deep
when
where
how
who
仅供参考

5
bin/resources/ext.dic Normal file
View File

@ -0,0 +1,5 @@
sql server
web server
http server
linux kernel
face++

View File

@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>TableFlow</comment>
<entry key="pointerTableName">edd_pointers</entry>
<entry key="migrationTask">gather_tasks</entry>
<entry key="sourceTableName">freecode_projects</entry>
<entry key="targetTableName">gather_projects</entry>
<entry key="sourceFields">deal_name,projectTitle,description,last_update_time,registered_time,followers_num,url,MD5(url),null,implementation,tags,null,null,null,null,visit_num,category,crawled_time,history,"FreeCode",license,operateSystem</entry>
<!-- <entry key="targetFields">url,crawled_time,tags,license,name,description,language,platform,source,registered_time,urlMD5</entry> -->
<entry key="targetFields">deal_name,name,description,last_update_time,registered_time,followers_num,url,urlMD5,pageMD5,language,tags,contributors_num,monitor,status,download_num,visit_num,category,crawled_time,history,source,license,platform</entry>
<entry key="waitDataTime">3600000</entry>
<entry key="andWhere"> AND projectTitle is not null AND description IS NOT NULL and projectTitle != '' and description != ''</entry>
<entry key="idsBegin">1</entry>
<entry key="idsEnd">500000</entry>
<entry key="idsIncrement">500</entry>
</properties>

40
bin/resources/log4j.xml Normal file
View File

@ -0,0 +1,40 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
<appender name="stdout" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<appender name="MAIL" class="org.apache.log4j.net.SMTPAppender">
<param name="threshold" value="info" />
<param name="BufferSize" value="512" />
<param name="From" value="gcm365111@126.com" />
<param name="SMTPHost" value="SMTP.126.com" />
<param name="Subject" value="this is test" />
<param name="SMTPUsername" value="gcm365111@126.com" />
<param name="SMTPPassword" value="03023651gcm" />
<param name="to" value="gcm3651@126.com" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<appender name="file" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="./log/info.log" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<root>
<level value="info" />
<appender-ref ref="file" />
<appender-ref ref="stdout" />
</root>
</log4j:configuration>

275713
bin/resources/main2012.dic Normal file

File diff suppressed because it is too large Load Diff

17
bin/resources/myBatis.xml Normal file
View File

@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8" ?>
<!DOCTYPE configuration
PUBLIC "-//mybatis.org//DTD Config 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-config.dtd">
<configuration>
<environments default="development">
<environment id="development">
<transactionManager type="JDBC"/>
<dataSource type="POOLED">
</dataSource>
</environment>
</environments>
</configuration>

View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>TableFlow</comment>
<entry key="pointerTableName">edd_pointers</entry>
<entry key="migrationTask">gather_tasks</entry>
<entry key="sourceTableName">openhub_project</entry>
<entry key="targetTableName">gather_projects</entry>
<entry key="sourceFields">null,name,description,last_update_time,firstCommitTime,followers_num,Url,MD5(Url),null,null,tags,contributorNum,null,null,null,null,null,null,null,"OpenHub",license,null</entry>
<entry key="targetFields">deal_name,name,description,last_update_time,registered_time,followers_num,url,urlMD5,pageMD5,language,tags,contributors_num,monitor,status,download_num,visit_num,category,crawled_time,history,source,license,platform</entry>
<entry key="waitDataTime">3600000</entry>
<entry key="andWhere"> and name is not null and description is not null and name != '' and description != '' and codeLocation not like '%add a code location%' </entry>
<entry key="idsBegin">1</entry>
<entry key="idsEnd">500000</entry>
<entry key="idsIncrement">500</entry>
</properties>

View File

@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>TableFlow</comment>
<entry key="pointerTableName">edd_pointers</entry>
<entry key="migrationTask">gather_tasks</entry>
<entry key="sourceTableName">oschina_project</entry>
<entry key="targetTableName">gather_projects</entry>
<entry key="sourceFields">deal_name,projectTitle,description,last_update_time,IncludedTime,followers_num,url,MD5(Url),pageMD5,language,tags,contributors_num,monitor,status,download_num,visit_num,category,crawled_time,history,"OSChina",license,projectOS</entry>
<!-- <entry key="targetFields">url,crawled_time,tags,license,name,description,language,platform,source,registered_time,urlMD5</entry> -->
<entry key="targetFields">deal_name,name,description,last_update_time,registered_time,followers_num,url,urlMD5,pageMD5,language,tags,contributors_num,monitor,status,download_num,visit_num,category,crawled_time,history,source,license,platform</entry>
<entry key="waitDataTime">3600000</entry>
<entry key="andWhere"> and projectTitle is not null and description is not null and projectTitle != '' and description != '' </entry>
<entry key="idsBegin">1</entry>
<entry key="idsEnd">500000</entry>
<entry key="idsIncrement">500</entry>
</properties>

View File

@ -0,0 +1,316 @@
丈
世纪
位数
像素
克拉
公亩
公克
公分
公升
公尺
公担
公斤
公里
公顷
分钟
分米
加仑
千克
千米
厘米
周年
小时
平方
平方公尺
平方公里
平方分米
平方厘米
平方码
平方米
平方英寸
平方英尺
平方英里
平米
年代
年级
月份
毫升
毫米
毫克
海里
点钟
盎司
秒钟
立方公尺
立方分米
立方厘米
立方码
立方米
立方英寸
立方英尺
英亩
英寸
英尺
英里
阶段

View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>TableFlow</comment>
<entry key="pointerTableName">edd_pointers</entry>
<entry key="migrationTask">gather_tasks</entry>
<entry key="sourceTableName">sourceforge_project</entry>
<entry key="targetTableName">gather_projects</entry>
<entry key="sourceFields">null,name,description,last_update_time,registered_time,null,url,MD5(url),pageMd5,programmingLanguage,null,null,null,null,download_num,null,category,null,history,"SourceForge",license,platform</entry>
<entry key="targetFields">deal_name,name,description,last_update_time,registered_time,followers_num,url,urlMD5,pageMD5,language,tags,contributors_num,monitor,status,download_num,visit_num,category,crawled_time,history,source,license,platform</entry>
<entry key="waitDataTime">3600000</entry>
<entry key="andWhere"> and name is not null and description is not null and name != '' and description != '' and (download_num>0 or stars>0) </entry>
<entry key="idsBegin">1</entry>
<entry key="idsEnd">500000</entry>
<entry key="idsIncrement">500</entry>
</properties>

View File

@ -0,0 +1,36 @@
a
an
and
are
as
at
be
but
by
for
if
in
into
is
it
no
not
of
on
or
such
that
the
their
then
there
these
they
this
to
was
will
with
一个

2340
bin/resources/stopword_1.dic Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,20 @@
#!/bin/bash
find ./target/classes -name "*.properties"|xargs rm -f
find ./target/classes -name "*.xml"|xargs rm -f
find ./target/classes -name "*.dic"|xargs rm -f
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
tmp='./target/classes':$tmp
tmp='./target/EDBD-0.0.1-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
tmp='./bin/resources':$tmp
CLASSPATH=$tmp:$CLASSPATH
echo $CLASSPATH
JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
#echo JVM_ARGS=$JVM_ARGS
#ulimit -n 400000
#echo "" > nohup.out
java $JVM_ARGS -classpath $CLASSPATH com.ossean.GatherProjects sourceforge_projects_to_gather_projects >>log/sourceforge_projects_to_gather_projects.log 2>&1 &

7
bin/start_all_flow.sh Normal file
View File

@ -0,0 +1,7 @@
#!/bin/bash
sh bin/openhub_projects_to_gather_projects.sh
sh bin/sourceforge_projects_to_gather_projects.sh
sh bin/freecode_projects_to_gather_projects.sh
sh bin/oschina_projects_to_gather_projects.sh

View File

@ -0,0 +1,20 @@
#!/bin/bash
find ./target/classes -name "*.properties"|xargs rm -f
find ./target/classes -name "*.xml"|xargs rm -f
find ./target/classes -name "*.dic"|xargs rm -f
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
tmp='./target/classes':$tmp
tmp='./target/EDBD-0.0.1-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
tmp='./bin/resources':$tmp
CLASSPATH=$tmp:$CLASSPATH
echo $CLASSPATH
JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
#echo JVM_ARGS=$JVM_ARGS
#ulimit -n 400000
#echo "" > nohup.out
java $JVM_ARGS -classpath $CLASSPATH com.ossean.MergeProjects >>log/merge_projects.log 2>&1 &

View File

@ -0,0 +1,20 @@
#!/bin/bash
find ./target/classes -name "*.properties"|xargs rm -f
find ./target/classes -name "*.xml"|xargs rm -f
find ./target/classes -name "*.dic"|xargs rm -f
#export CLASSPATH=$CURR_DIR/lib:$CURR_DIR:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
tmp='./target/classes':$tmp
tmp='./target/EDBD-0.0.1-SNAPSHOT-jar-with-dependencies-without-resources/*':$tmp
tmp='./bin/resources':$tmp
CLASSPATH=$tmp:$CLASSPATH
echo $CLASSPATH
JVM_ARGS="-Xmn98m -Xmx512m -Xms512m -XX:NewRatio=4 -XX:SurvivorRatio=4 -XX:MaxTenuringThreshold=2"
#echo JVM_ARGS=$JVM_ARGS
#ulimit -n 400000
#echo "" > nohup.out
java $JVM_ARGS -classpath $CLASSPATH com.ossean.TransferProjects >>log/transfer_projects.log 2>&1 &

98
pom.xml Normal file
View File

@ -0,0 +1,98 @@
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<groupId>cn.edu.zhanyun</groupId>
<artifactId>project_manager</artifactId>
<version>0.0.1-SNAPSHOT</version>
<name>project_manager</name>
<build>
<sourceDirectory>${basedir}/src/main/java</sourceDirectory>
<resources>
<resource>
<directory>${basedir}/src/main/resources</directory>
<excludes>
<exclude>*.xml</exclude>
<exclude>*.properties</exclude>
</excludes>
</resource>
</resources>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<configuration>
<source>1.7</source>
<target>1.7</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<version>2.5.1</version>
<configuration>
<archive>
<manifest>
<mainClass>com.alan.myfunction.ProcedureTag</mainClass>
</manifest>
</archive>
<descriptors>
<descriptor>src/main/assembly/assembly.xml</descriptor>
</descriptors>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.1</version>
</dependency>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>5.1.30</version>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context</artifactId>
<version>4.1.4.RELEASE</version>
</dependency>
<dependency>
<groupId>c3p0</groupId>
<artifactId>c3p0</artifactId>
<version>0.9.1.2</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-log4j12</artifactId>
<version>1.7.7</version>
</dependency>
<dependency>
<groupId>org.mybatis</groupId>
<artifactId>mybatis</artifactId>
<version>3.1.1</version>
</dependency>
<dependency>
<groupId>commons-dbcp</groupId>
<artifactId>commons-dbcp</artifactId>
<version>1.3</version>
</dependency>
<dependency>
<groupId>org.mybatis</groupId>
<artifactId>mybatis-spring</artifactId>
<version>1.1.1</version>
</dependency>
</dependencies>
</project>

View File

@ -0,0 +1,25 @@
<assembly
xmlns="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/plugins/maven-assembly-plugin/assembly/1.1.2 http://maven.apache.org/xsd/assembly-1.1.2.xsd">
<!-- TODO: a jarjar format would be better -->
<id>jar-with-dependencies-without-resources</id>
<formats>
<format>dir</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>
<dependencySets>
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>false</useProjectArtifact>
<unpack>false</unpack>
<scope>runtime</scope>
</dependencySet>
<dependencySet>
<outputDirectory>/</outputDirectory>
<useProjectArtifact>false</useProjectArtifact>
<unpack>false</unpack>
<scope>system</scope>
</dependencySet>
</dependencySets>
</assembly>

View File

@ -0,0 +1,257 @@
package com.ossean.projectmanager;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.annotation.Resource;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Component;
import com.ossean.projectmanager.hotwords.InsertHotwords;
import com.ossean.projectmanager.hotwords.MapSort;
import com.ossean.projectmanager.hotwords.StringHandler;
import com.ossean.projectmanager.hotwords.UpdateTagsAndTaggings;
import com.ossean.projectmanager.lasttabledao.OpenSourceProjectDao;
import com.ossean.projectmanager.lasttabledao.PointersDao;
import com.ossean.projectmanager.lasttabledao.RelativeMemoDao;
import com.ossean.projectmanager.lasttabledao.RelativeMemoToOpenSourceProjectDao;
import com.ossean.projectmanager.lasttabledao.TagDao;
import com.ossean.projectmanager.lasttabledao.TaggingsDao;
import com.ossean.projectmanager.model.Hotword;
import com.ossean.projectmanager.model.OpenSourceProject;
import com.ossean.projectmanager.model.RelativeMemo;
import com.ossean.projectmanager.model.RelativeMemoToOpenSourceProject;
@Component
public class HotwordsMain {
Logger logger = Logger.getLogger(this.getClass());
@Resource
private PointersDao pointersDao;
@Resource
private OpenSourceProjectDao ospDao;
@Resource
private RelativeMemoToOpenSourceProjectDao memoToOspDao;
@Resource
private TaggingsDao taggingsDao;
@Resource
private RelativeMemoDao memoDao;
@Resource
private TagDao tagsDao;
@Qualifier("updatetagsandtaggings")
@Autowired
private UpdateTagsAndTaggings updateClass;
@Qualifier("inserthotwords")
@Autowired
private InsertHotwords insertClass;
private static String sourceTableName = "open_source_projects";
private static String targetTableName = "hot_words";
private static int batchSize = 10;// 一次处理项目数量
public void start(){
while(true){
//读取断点位置
int startId = 1;
try {
startId = pointersDao.readPointer(sourceTableName, targetTableName);
} catch (Exception e){
//表示还没有该记录
pointersDao.insertPointer(sourceTableName, targetTableName);
}
//根据断点位置和批处理数量读取项目对象列表
List<OpenSourceProject> projects = ospDao.getProjectsByBatch(startId, batchSize);
if(projects.size() == 0){
//如果没有独处项目信息 表示没有需要处理的项目
logger.info("no projects! Sleep 3600s");
try {
Thread.sleep(3600*1000L);
continue;
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
for(OpenSourceProject project:projects){
//循环处理所有的项目
logger.info("匹配项目:" + project.getName() + " id:" + project.getId());
//读取relative_memo_to_open_source_projects表中当前项目对应的帖子
int prjId = project.getId();
List<RelativeMemoToOpenSourceProject> mtps = memoToOspDao.getHighWeightMatchResult(prjId, getTargetTable(prjId));
//定义存储标签name和标签权重的map 每个project一个map对象
Map<String, Float> tagWeightMap = new LinkedHashMap<String,Float>();
//定义存储标签name和标签词频的map 每个project一个map对象
Map<String, Integer> tagCountMap = new HashMap<String, Integer>();
//遍历每一个关联结果 即遍历每一个关联的帖子
for(RelativeMemoToOpenSourceProject item:mtps){
//读取帖子id
int memoId = item.getRelative_memo_id();
//获取RelativeMemo对象
RelativeMemo memo = memoDao.getById(memoId);
//将RelativeMemos对象中的tags属性转换成List<String>
List<String> memoTagList = new ArrayList<String>();
try{
memoTagList = StringHandler.changeTagsToTagList(memo.getTags());
}catch(Exception e){
//logger.info("没有找到对应的tags标签 memo_id:" + memoId);
continue;
}
//循环遍历每个tagName 将对应的权重计算出来 得到最终的map
for(String tagName:memoTagList){
if(tagWeightMap.containsKey(tagName)){
//表示map中已经存在该标签 需要将原来标签的权重取出并加上相应的taggings表中记录的匹配权重
tagWeightMap.put(tagName, tagWeightMap.get(tagName) + item.getMatch_weight());
tagCountMap.put(tagName, tagCountMap.get(tagName) + 1);
}else{
//表示map中没有该标签名
tagWeightMap.put(tagName, item.getMatch_weight());
tagCountMap.put(tagName,1);
}
}
}
//将map根据值从大到小排序
tagWeightMap = MapSort.sort(tagWeightMap);
//构造hotwords的List
List<Hotword> hotwordsList = new ArrayList<Hotword>();
Set<String> tagNames = tagWeightMap.keySet();
Iterator<String> iterator = tagNames.iterator();
int count_hotwords = 0;
while(iterator.hasNext() && count_hotwords < 20){
String tagName = iterator.next();//读取标签的名字
Hotword hotwords = new Hotword();
hotwords.setOsp_id(project.getId());
hotwords.setName(tagName);
hotwords.setWeight(tagWeightMap.get(tagName));
count_hotwords++;
hotwordsList.add(hotwords);
}
//读取该项目的tags属性并转换成List
List<String> projectTagList = StringHandler.changeTagsToTagList(project.getTags());
//记录需要修改disagree_num的标签名
List<String> updateDisagreeNumTagNameList = new ArrayList<String>();
if(projectTagList.size() < 10){
//表示如果项目标签数量小于10 向里面添加5填新的标签记录
//遍历项目关联的所有标签
Set<String> keys = tagWeightMap.keySet();
Iterator<String> it = keys.iterator();
int count = 0;//用于对新添加到项目中的标签进行计数
//最多增加5个新的标签 最多增加到10个
while(it.hasNext() && count < 5){
String tagName = it.next();//取出标签的名字
if(StringHandler.isTagExist(tagName, projectTagList)){
//表示当前关联到的标签在项目标签中存在 需要对taggings表中disagree_num增加该标签出现的词频
updateDisagreeNumTagNameList.add(tagName);//将需要修改的标签名先存储到列表中 在事物处理的时候统一处理
continue;
}else{
//表示当前关联到的标签在项目原标签中不存在
projectTagList.add(tagName);//将新标签添加到项目标签列表中
count++;//每添加一个新标签 count计数器加1
}
}
//如果count > 0 表示tags属性需要更新taggings需要插入记录 同时需要向热词表中插入20条数据
if(count > 0){
String tagsNew = StringHandler.changeTagListToTags(projectTagList);
logger.info("正在进行更新项目标签信息和taggings表信息操作请勿中断程序");
updateClass.update(project, projectTagList, tagsNew, count, sourceTableName, targetTableName, hotwordsList, updateDisagreeNumTagNameList, tagCountMap);
}
else{
//表示没有新增的标签 只需要向热词表中添加20条热词
insertClass.insert(hotwordsList, sourceTableName, targetTableName, project.getId() + 1, updateDisagreeNumTagNameList, tagCountMap, project);
logger.info("当前项目" + project.getName() + "没有标签更新操作只插入了热词和更新disagree_num标签");
}
}
else{
//表示项目已经存在10条记录 需要再添加3个标签 已经存在的标签不算在3个内
//遍历项目关联的所有标签
Set<String> keys = tagWeightMap.keySet();
Iterator<String> it = keys.iterator();
int count = 0;//用于对新添加到项目中的标签进行计数
//最多增加5个新的标签 最多增加到10个
while(it.hasNext() && count < 3){
String tagName = it.next();//取出标签的名字
if(StringHandler.isTagExist(tagName, projectTagList)){
//表示当前关联到的标签在项目标签中存在 需要对taggings表中disagree_num增加该标签出现的词频
updateDisagreeNumTagNameList.add(tagName);//将需要修改的标签名先存储到列表中 在事物处理的时候统一处理
continue;
}else{
//表示当前关联到的标签在项目原标签中不存在
projectTagList.add(tagName);//将新标签添加到项目标签列表中
count++;//每添加一个新标签 count计数器加1
}
}
//如果count > 0 表示tags属性需要更新taggings需要插入记录 同时需要向热词表中插入20条数据
if(count > 0){
String tagsNew = StringHandler.changeTagListToTags(projectTagList);
logger.info("正在进行更新项目标签信息和taggings表信息操作请勿中断程序");
updateClass.update(project, projectTagList, tagsNew, count, sourceTableName, targetTableName, hotwordsList, updateDisagreeNumTagNameList, tagCountMap);
}
else{
//表示没有新增的标签 只需要向热词表中添加20条热词
insertClass.insert(hotwordsList, sourceTableName, targetTableName, project.getId() + 1, updateDisagreeNumTagNameList, tagCountMap, project);
logger.info("当前项目" + project.getName() + "只插入了热词和更新disagree_num标签");
}
insertClass.insert(hotwordsList, sourceTableName, targetTableName, project.getId() + 1, updateDisagreeNumTagNameList, tagCountMap, project);
logger.info("当前项目" + project.getName() + "只插入了热词和更新disagree_num标签");
}
}
}
}
/**
* get the match result table's name
* @param osp_id
* @return
*/
public static String getTargetTable(int osp_id){
String targetTableName = "";
if (osp_id < 500) {
targetTableName = "relative_memo_to_open_source_projects_1";
}
else if (osp_id >= 500 && osp_id < 1000) {
targetTableName = "relative_memo_to_open_source_projects_2";
}
else if (osp_id >= 1000 && osp_id < 1500) {
targetTableName = "relative_memo_to_open_source_projects_3";
}
else if (osp_id >= 1500 && osp_id < 2000) {
targetTableName = "relative_memo_to_open_source_projects_4";
}
else if (osp_id >= 2000 && osp_id < 3000) {
targetTableName = "relative_memo_to_open_source_projects_5";
}
else if (osp_id >= 3000 && osp_id < 5000) {
targetTableName = "relative_memo_to_open_source_projects_6";
}
else if (osp_id >= 5000 && osp_id < 7500) {
targetTableName = "relative_memo_to_open_source_projects_7";
}
else if (osp_id >= 7500 && osp_id < 10000) {
targetTableName = "relative_memo_to_open_source_projects_8";
}
else if (osp_id >= 10000 && osp_id < 310000) {
int a = 7 + osp_id/5000;
targetTableName = "relative_memo_to_open_source_projects_" + a;
}
else if (osp_id >= 310000) {
targetTableName = "relative_memo_to_open_source_projects_70";
}
return targetTableName;
}
public static void main(String[] args){
ApplicationContext applicationContext = new ClassPathXmlApplicationContext("classpath:/applicationContext*.xml");
HotwordsMain mainClass = applicationContext.getBean(HotwordsMain.class);
mainClass.start();
}
}

View File

@ -0,0 +1,26 @@
package com.ossean.projectmanager;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;
import org.springframework.stereotype.Component;
import com.ossean.projectmanager.projectsfilter.ProjectsFilter;
@Component
public class ProjectsFilterMain {
@Qualifier("projectsFilter")
@Autowired
private ProjectsFilter projectsFilter;
public void start(){
projectsFilter.filtratePrjs();
}
public static void main(String[] args){
ApplicationContext applicationContext = new ClassPathXmlApplicationContext("classpath:/applicationContext*.xml");
ProjectsFilterMain mainClass = applicationContext.getBean(ProjectsFilterMain.class);
mainClass.start();
}
}

View File

@ -0,0 +1,68 @@
package com.ossean.projectmanager.hotwords;
import java.util.List;
import java.util.Map;
import javax.annotation.Resource;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;
import com.ossean.projectmanager.lasttabledao.HotwordDao;
import com.ossean.projectmanager.lasttabledao.PointersDao;
import com.ossean.projectmanager.lasttabledao.TagDao;
import com.ossean.projectmanager.lasttabledao.TaggingsDao;
import com.ossean.projectmanager.model.Hotword;
import com.ossean.projectmanager.model.OpenSourceProject;
@Component("inserthotwords")
public class InsertHotwords {
@Resource
private HotwordDao hotwordsDao;
@Resource
private PointersDao pointersDao;
@Resource
private TagDao tagsDao;
@Resource
private TaggingsDao taggingsDao;
@Transactional(propagation=Propagation.REQUIRES_NEW)
public void insert(List<Hotword> hotwordsList, String sourceTableName, String targetTableName, int pointer, List<String> updateDisagreeNumTagNameList, Map<String, Integer> tagCountMap, OpenSourceProject project){
int count = 0;//用于纪录插入了多少个热词
for(int i = 0; i < hotwordsList.size() && count < 20; i++){
Hotword hotwords = hotwordsList.get(i);
//首先检查该osp_id和name的hotwords记录是否存在
List<Hotword> tmp = hotwordsDao.findItem(hotwords);
if(tmp.size() != 0){
//表示当前的记录存在
continue;
}
//循环插入每一个hotwords对象
hotwordsDao.insertItem(hotwords);
count++;
}
//修改需要增加disagree_num的词频
for(int i = 0; i < updateDisagreeNumTagNameList.size(); i++){
String tagName = updateDisagreeNumTagNameList.get(i);
//根据tagName查找tagId
List<Integer> ids = tagsDao.getIdByName(tagName);
if(ids.size() == 0){
//表示并不存在该标签 需要在tags表中增加这条记录
tagsDao.insertTag(tagName);
i--;
continue;
}
int tag_id = ids.get(0);
int taggable_id = project.getId();
String taggable_type = "OpenSourceProject";
int value = taggingsDao.getDisagreeNum(tag_id, taggable_id, taggable_type);
value = value + tagCountMap.get(tagName);//增加标签在帖子中出现的次数
taggingsDao.updateDisagreeNum(value, taggable_id, taggable_type, tag_id);
}
pointersDao.updatePointer(sourceTableName, targetTableName, pointer);
}
}

View File

@ -0,0 +1,33 @@
package com.ossean.projectmanager.hotwords;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
public class MapSort {
public static Map<String, Float> sort(Map<String, Float> oldMap){
ArrayList<Map.Entry<String, Float>> list = new ArrayList<Map.Entry<String, Float>>(oldMap.entrySet());
Collections.sort(list, new Comparator<Map.Entry<String, Float>>() {
public int compare(Entry<String, Float> arg0,
Entry<String, Float> arg1) {
if(arg0.getValue() - arg1.getValue() > 0)
return -1;
else if(arg0.getValue() - arg1.getValue() < 0)
return 1;
else
return 0;
}
});
Map<String,Float> newMap = new LinkedHashMap<String,Float>();
for (int i = 0; i < list.size(); i++) {
newMap.put(list.get(i).getKey(), list.get(i).getValue());
}
return newMap;
}
}

View File

@ -0,0 +1,58 @@
package com.ossean.projectmanager.hotwords;
import java.util.ArrayList;
import java.util.List;
public class StringHandler {
//将tags字符串转换成List<String>
public static List<String> changeTagsToTagList(String tags){
List<String> result = new ArrayList<String>();
if(tags != null && !"".equals(tags)){
String[] strings = tags.split(",");
for(String str:strings){
int index1 = str.indexOf("<");
int index2 = str.indexOf(">");
if(index2 > index1 && index1 >= 0){
//读取当前的标签
String tag = str.substring(index1 + 1, index2);
if(!"".equals(tag) )
result.add(tag);
}
}
}
return result;
}
//将tagList转换成字符串
public static String changeTagListToTags(List<String> tagList){
String result = "";
for(String tag:tagList){
result += "<" + tag + ">,";
}
if(!"".equals(result)){
//表示需要转换的标签数量不为0
result = result.substring(0, result.length() - 1);
}else{
//表示没有需要转换的标签
result = null;
}
return result;
}
//判断标签在List中是否存在
public static boolean isTagExist(String tag, List<String> tags){
for(int i = 0; i < tags.size(); i++){
String tagName = tags.get(i);
if(tagName.equals(tag)){
//表示存在相同的标签
return true;
}
}
return false;//遍历完还没有返回true 就表示没有相同名称的标签
}
}

View File

@ -0,0 +1,81 @@
package com.ossean.projectmanager.hotwords;
import java.util.List;
import java.util.Map;
import javax.annotation.Resource;
import org.apache.log4j.Logger;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Qualifier;
import org.springframework.stereotype.Component;
import org.springframework.transaction.annotation.Propagation;
import org.springframework.transaction.annotation.Transactional;
import com.ossean.projectmanager.lasttabledao.OpenSourceProjectDao;
import com.ossean.projectmanager.lasttabledao.PointersDao;
import com.ossean.projectmanager.lasttabledao.TagDao;
import com.ossean.projectmanager.lasttabledao.TaggingsDao;
import com.ossean.projectmanager.model.Hotword;
import com.ossean.projectmanager.model.OpenSourceProject;
import com.ossean.projectmanager.model.Taggings;
@Component("updatetagsandtaggings")
public class UpdateTagsAndTaggings {
Logger logger = Logger.getLogger(this.getClass());
@Resource
private TaggingsDao taggingsDao;
@Resource
private OpenSourceProjectDao ospDao;
@Resource
private TagDao tagsDao;
@Resource
private PointersDao pointersDao;
@Qualifier("inserthotwords")
@Autowired
private InsertHotwords insertClass;
@Transactional(propagation=Propagation.REQUIRES_NEW)
public void update(OpenSourceProject project, List<String> projectTagList, String tagsNew, int count, String sourceTableName, String targetTableName, List<Hotword> hotwordsList, List<String> updateDisagreeNumTagNameList, Map<String, Integer> tagCountMap){
//对新的项目标签(tags)属性进行更新
ospDao.updateTagsOfProject(project.getId(), tagsNew);
//对新加的标签进行遍历
for(int i = projectTagList.size() - 1; i >= projectTagList.size() - count; i--){
String tagName = projectTagList.get(i);
//取出相应tagName的标签
List<Integer> tagIds = tagsDao.getIdByName(tagName);
if(tagIds.size() == 0){
//表示tag不存在
logger.fatal("tag: " + tagName + " doesn't exist! We will insert one");
tagsDao.insertTag(tagName);
i++;
continue;
}else{
//表示标签存在
int tagId = tagIds.get(0);
//构造taggings对象
Taggings taggings = new Taggings();
taggings.setTag_id(tagId);
taggings.setTaggable_id(project.getId());
taggings.setDisagree_num(0);//初始化的disagree_num为0
taggings.setTaggable_type("OpenSourceProject");
taggings.setContext("tags");
taggings.setTag_source("FromRelativeMemo");
List<Taggings> tmp = taggingsDao.findTaggings(taggings);
if(tmp.size() != 0){
//表示已经存在相应的taggings记录
continue;
}else{
//将新记录插入taggings表中
taggingsDao.insertTaggings(taggings);
}
}
}
//将项目对应的热词插入hotwords表 在insert函数中进行指针的更新 同时更新已经存在在项目标签属性中的标签disagree_num值
insertClass.insert(hotwordsList, sourceTableName, targetTableName, project.getId() + 1, updateDisagreeNumTagNameList, tagCountMap, project);
logger.info("当前项目" + project.getName() + "的更新操作完成");
}
}

View File

@ -0,0 +1,22 @@
package com.ossean.projectmanager.lasttabledao;
import java.util.List;
import org.apache.ibatis.annotations.Insert;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import com.ossean.projectmanager.model.Hotword;
public interface HotwordDao {
//向热词表中添加数据
@Insert("insert into hot_words (`osp_id`,`name`,`weight`,`created_at`,`updated_at`) values (#{item.osp_id},#{item.name},#{item.weight},now(),now())")
public void insertItem(@Param("item") Hotword item);
//查看是否存在osp_id和name对应的记录
@Select("select * from hot_words where osp_id=#{item.osp_id} and name=#{item.name}")
public List<Hotword> findItem(@Param("item") Hotword item);
}

View File

@ -0,0 +1,31 @@
package com.ossean.projectmanager.lasttabledao;
import java.util.List;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import org.apache.ibatis.annotations.Update;
import com.ossean.projectmanager.model.OpenSourceProject;
public interface OpenSourceProjectDao {
// 读取一定数量的项目信息
@Select("select * from open_source_projects where id>=#{start} limit #{size}")
public List<OpenSourceProject> getProjectsByBatch(
@Param("start") int start, @Param("size") int size);
// 对项目标签属性进行更新
@Update("update open_source_projects set tags=#{tags} where id=#{id}")
public void updateTagsOfProject(@Param("id") int id,
@Param("tags") String tags);
// 批量获取项目
@Select("select id,source,url,filtration from open_source_projects limit #{batchSize}")
public List<OpenSourceProject> getBatchPrjs(@Param("batchSize") int batchSize);
// filtration为1表示保留为2表示之前保留的且已处理为0表示不保留
@Update("update open_source_projects set filtration = #{filtration} where id = #{prjId}")
public void updateFiltratedPrj(@Param("prjId") int prjId,
@Param("filtration") int filtration);
}

View File

@ -0,0 +1,23 @@
package com.ossean.projectmanager.lasttabledao;
import org.apache.ibatis.annotations.Insert;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import org.apache.ibatis.annotations.Update;
public interface PointersDao {
//读取pointers表中对应sourceTableName和targetTableNamed的指针
@Select("select Pointer from pointers where SourceTableName=#{source} and TargetTableName=#{target}")
public int readPointer(@Param("source") String source, @Param("target") String target);
//初始化指针 初始化大小为1
@Insert("insert into pointers (SourceTableName, TargetTableName, Pointer) values (#{source}, #{target}, 1)")
public void insertPointer(@Param("source") String source, @Param("target") String target);
//更新指针
@Update("update pointers set Pointer=#{pointer} where SourceTableName=#{source} and TargetTableName=#{target}")
public void updatePointer(@Param("source") String source, @Param("target") String target, @Param("pointer") int pointer);
}

View File

@ -0,0 +1,14 @@
package com.ossean.projectmanager.lasttabledao;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import com.ossean.projectmanager.model.RelativeMemo;
public interface RelativeMemoDao {
//根据帖子id获取帖子对象
@Select("select * from relative_memos where id=#{id}")
public RelativeMemo getById(@Param("id") int id);
}

View File

@ -0,0 +1,29 @@
package com.ossean.projectmanager.lasttabledao;
import java.util.List;
import org.apache.ibatis.annotations.Delete;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import com.ossean.projectmanager.model.RelativeMemoToOpenSourceProject;
public interface RelativeMemoToOpenSourceProjectDao {
// 读取项目id对应的匹配权重较大的所有关联的帖子
@Select("select * from ${targetTableName} where osp_id = #{osp_id} and match_weight > 2")
public List<RelativeMemoToOpenSourceProject> getHighWeightMatchResult(
@Param("osp_id") int osp_id,
@Param("targetTableName") String targetTableName);
// 读取项目id对应的所有关联的帖子
@Select("select * from relative_memo_to_open_source_projects_new where osp_id=#{osp_id}")
public List<RelativeMemoToOpenSourceProject> getRelativeMemosByOspId(
@Param("osp_id") int osp_id);
// 删除指定项目的所有匹配结果
@Delete("delete from ${targetTableName} where osp_id = ${ospId}")
public void deleteMatchResult(
@Param("targetTableName") String targetTableName,
@Param("ospId") int ospId);
}

View File

@ -0,0 +1,20 @@
package com.ossean.projectmanager.lasttabledao;
import java.util.List;
import org.apache.ibatis.annotations.Insert;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
public interface TagDao {
//根据标签名 取出标签id
@Select("select id from tags where name=#{name}")
public List<Integer> getIdByName(@Param("name") String name);
//向tags表插入数据
@Insert("insert into tags (name) values (#{name})")
public void insertTag(@Param("name") String name);
}

View File

@ -0,0 +1,36 @@
package com.ossean.projectmanager.lasttabledao;
import java.util.List;
import org.apache.ibatis.annotations.Insert;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import org.apache.ibatis.annotations.Update;
import com.ossean.projectmanager.model.Taggings;
public interface TaggingsDao {
//根据帖子id读取taggings表中与帖子相关联的taggings记录
@Select("select * from taggings where taggable_id=#{memoId} and taggable_type='RelativeMemo'")
public List<Taggings> getByMemoId(@Param("memoId") int memoId);
//向taggings表中插入由匹配帖子标签得到的新项目标签
@Insert("insert into taggings (tag_id,taggable_id,taggable_type,disagree_num,context,created_at,tag_source) values (#{item.tag_id},#{item.taggable_id},#{item.taggable_type},#{item.disagree_num},#{item.context},now(),#{item.tag_source})")
public void insertTaggings(@Param("item") Taggings item);
//查看taggings表中是否存在要查询的标签
@Select("select * from taggings where tag_id=#{item.tag_id} and taggable_type=#{item.taggable_type} and taggable_id=#{item.taggable_id}")
public List<Taggings> findTaggings(@Param("item") Taggings item);
//修改disagree_num值
@Update("update taggings set disagree_num=#{value} where taggable_id=#{taggable_id} AND taggable_type=#{taggable_type} AND tag_id=#{tag_id} ")
public void updateDisagreeNum(@Param("value") int value, @Param("taggable_id") int taggable_id, @Param("taggable_type") String taggable_type, @Param("tag_id") int tag_id);
//获取disagree_num值
@Select("select disagree_num from taggings where tag_id=#{tag_id} and taggable_id=#{taggable_id} and taggable_type=#{taggable_type}")
public int getDisagreeNum(@Param("tag_id") int tag_id, @Param("taggable_id") int taggable_id, @Param("taggable_type") String taggable_type);
}

View File

@ -0,0 +1,48 @@
package com.ossean.projectmanager.model;
public class Hotword {
private int id;
private int osp_id;
private String name;
private float weight;
private String created_at;
private String updated_at;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public int getOsp_id() {
return osp_id;
}
public void setOsp_id(int osp_id) {
this.osp_id = osp_id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public float getWeight() {
return weight;
}
public void setWeight(float weight) {
this.weight = weight;
}
public String getCreated_at() {
return created_at;
}
public void setCreated_at(String created_at) {
this.created_at = created_at;
}
public String getUpdated_at() {
return updated_at;
}
public void setUpdated_at(String updated_at) {
this.updated_at = updated_at;
}
}

View File

@ -0,0 +1,140 @@
package com.ossean.projectmanager.model;
public class OpenSourceProject {
private int id;
private String url;
private String name;
private String source;
private int filration;
private int followers_num;
private String language;
private int download_num;
private int view_num_crawled;
private String category;
private String crawled_time;
private int view_num_local;
private String created_at;
private String updated_at;
private int ossean_score;
private int relative_memos_num;
private String created_time;
private String updated_time;
private String tags;
public String getUrl(){
return url;
}
public void setUrl(String url) {
this.url = url;
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public int getFilration() {
return filration;
}
public void setFilration(int filration) {
this.filration = filration;
}
public int getFollowers_num() {
return followers_num;
}
public void setFollowers_num(int followers_num) {
this.followers_num = followers_num;
}
public String getLanguage() {
return language;
}
public void setLanguage(String language) {
this.language = language;
}
public int getDownload_num() {
return download_num;
}
public void setDownload_num(int download_num) {
this.download_num = download_num;
}
public int getView_num_crawled() {
return view_num_crawled;
}
public void setView_num_crawled(int view_num_crawled) {
this.view_num_crawled = view_num_crawled;
}
public String getCategory() {
return category;
}
public void setCategory(String category) {
this.category = category;
}
public String getCrawled_time() {
return crawled_time;
}
public void setCrawled_time(String crawled_time) {
this.crawled_time = crawled_time;
}
public int getView_num_local() {
return view_num_local;
}
public void setView_num_local(int view_num_local) {
this.view_num_local = view_num_local;
}
public String getCreated_at() {
return created_at;
}
public void setCreated_at(String created_at) {
this.created_at = created_at;
}
public String getUpdated_at() {
return updated_at;
}
public void setUpdated_at(String updated_at) {
this.updated_at = updated_at;
}
public int getOssean_score() {
return ossean_score;
}
public void setOssean_score(int ossean_score) {
this.ossean_score = ossean_score;
}
public int getRelative_memos_num() {
return relative_memos_num;
}
public void setRelative_memos_num(int relative_memos_num) {
this.relative_memos_num = relative_memos_num;
}
public String getCreated_time() {
return created_time;
}
public void setCreated_time(String created_time) {
this.created_time = created_time;
}
public String getUpdated_time() {
return updated_time;
}
public void setUpdated_time(String updated_time) {
this.updated_time = updated_time;
}
public String getTags() {
return tags;
}
public void setTags(String tags) {
this.tags = tags;
}
}

View File

@ -0,0 +1,27 @@
package com.ossean.projectmanager.model;
public class OpenhubProject {
private String description;
private String name;
private String codeLocation;
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getCodeLocation() {
return codeLocation;
}
public void setCodeLocation(String codeLocation) {
this.codeLocation = codeLocation;
}
}

View File

@ -0,0 +1,209 @@
package com.ossean.projectmanager.model;
public class RelativeMemo {
private int id;
private int osp_id;
private String subject;
private String content;
private String author;
private int replies_num;
private int lock;
private int sticky;
private String created_time;
private String updated_time;
private String url;
private int view_num_crawled;
private int vote_up_num;
private int collection_num;
private String abstractText;
private String memo_type;
private String source;
private String category;
private int view_num_trustie;
private int author_id;
private int parent_id;
private int last_reply_id;
private int is_quote;
private String username;
private String userhomeurl;
private String crawled_time;
private String author_url;
private String url_md5;
private String tags;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public int getOsp_id() {
return osp_id;
}
public void setOsp_id(int osp_id) {
this.osp_id = osp_id;
}
public String getSubject() {
return subject;
}
public void setSubject(String subject) {
this.subject = subject;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public int getReplies_num() {
return replies_num;
}
public void setReplies_num(int replies_num) {
this.replies_num = replies_num;
}
public int getLock() {
return lock;
}
public void setLock(int lock) {
this.lock = lock;
}
public int getSticky() {
return sticky;
}
public void setSticky(int sticky) {
this.sticky = sticky;
}
public String getCreated_time() {
return created_time;
}
public void setCreated_time(String created_time) {
this.created_time = created_time;
}
public String getUpdated_time() {
return updated_time;
}
public void setUpdated_time(String updated_time) {
this.updated_time = updated_time;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public int getView_num_crawled() {
return view_num_crawled;
}
public void setView_num_crawled(int view_num_crawled) {
this.view_num_crawled = view_num_crawled;
}
public int getVote_up_num() {
return vote_up_num;
}
public void setVote_up_num(int vote_up_num) {
this.vote_up_num = vote_up_num;
}
public int getCollection_num() {
return collection_num;
}
public void setCollection_num(int collection_num) {
this.collection_num = collection_num;
}
public String getAbstractText() {
return abstractText;
}
public void setAbstractText(String abstractText) {
this.abstractText = abstractText;
}
public String getMemo_type() {
return memo_type;
}
public void setMemo_type(String memo_type) {
this.memo_type = memo_type;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public String getCategory() {
return category;
}
public void setCategory(String category) {
this.category = category;
}
public int getView_num_trustie() {
return view_num_trustie;
}
public void setView_num_trustie(int view_num_trustie) {
this.view_num_trustie = view_num_trustie;
}
public int getAuthor_id() {
return author_id;
}
public void setAuthor_id(int author_id) {
this.author_id = author_id;
}
public int getParent_id() {
return parent_id;
}
public void setParent_id(int parent_id) {
this.parent_id = parent_id;
}
public int getLast_reply_id() {
return last_reply_id;
}
public void setLast_reply_id(int last_reply_id) {
this.last_reply_id = last_reply_id;
}
public int getIs_quote() {
return is_quote;
}
public void setIs_quote(int is_quote) {
this.is_quote = is_quote;
}
public String getUsername() {
return username;
}
public void setUsername(String username) {
this.username = username;
}
public String getUserhomeurl() {
return userhomeurl;
}
public void setUserhomeurl(String userhomeurl) {
this.userhomeurl = userhomeurl;
}
public String getCrawled_time() {
return crawled_time;
}
public void setCrawled_time(String crawled_time) {
this.crawled_time = crawled_time;
}
public String getAuthor_url() {
return author_url;
}
public void setAuthor_url(String author_url) {
this.author_url = author_url;
}
public String getUrl_md5() {
return url_md5;
}
public void setUrl_md5(String url_md5) {
this.url_md5 = url_md5;
}
public String getTags() {
return tags;
}
public void setTags(String tags) {
this.tags = tags;
}
}

View File

@ -0,0 +1,73 @@
package com.ossean.projectmanager.model;
import java.util.List;
public class RelativeMemoToOpenSourceProject {
private int id;
private int osp_id;
private int relative_memo_id;
private float match_weight;
private String created_time;
private List<Integer> tagIds;
private int replies_num;
private int view_num_crawled;
private String memo_type;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public int getOsp_id() {
return osp_id;
}
public void setOsp_id(int osp_id) {
this.osp_id = osp_id;
}
public int getRelative_memo_id() {
return relative_memo_id;
}
public void setRelative_memo_id(int relative_memo_id) {
this.relative_memo_id = relative_memo_id;
}
public float getMatch_weight() {
return match_weight;
}
public void setMatch_weight(float match_weight) {
this.match_weight = match_weight;
}
public String getCreated_time() {
return created_time;
}
public void setCreated_time(String created_time) {
this.created_time = created_time;
}
public List<Integer> getTagIds() {
return tagIds;
}
public void setTagIds(List<Integer> tagIds) {
this.tagIds = tagIds;
}
public int getReplies_num() {
return replies_num;
}
public void setReplies_num(int replies_num) {
this.replies_num = replies_num;
}
public int getView_num_crawled() {
return view_num_crawled;
}
public void setView_num_crawled(int view_num_crawled) {
this.view_num_crawled = view_num_crawled;
}
public String getMemo_type() {
return memo_type;
}
public void setMemo_type(String memo_type) {
this.memo_type = memo_type;
}
}

View File

@ -0,0 +1,34 @@
package com.ossean.projectmanager.model;
public class SourceForgeProject {
private String description;
private String name;
private int download_num;
private int stars;
public String getDescription() {
return description;
}
public void setDescription(String description) {
this.description = description;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public int getDownload_num() {
return download_num;
}
public void setDownload_num(int download_num) {
this.download_num = download_num;
}
public int getStars() {
return stars;
}
public void setStars(int stars) {
this.stars = stars;
}
}

View File

@ -0,0 +1,75 @@
package com.ossean.projectmanager.model;
public class Taggings {
private int id;
private int tag_id;
private int taggable_id;
private String taggable_type;
private int tagger_id;
private String tagger_type;
private String context;
private String created_at;
private int disagree_num;
private String tag_source;
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public int getTag_id() {
return tag_id;
}
public void setTag_id(int tag_id) {
this.tag_id = tag_id;
}
public int getTaggable_id() {
return taggable_id;
}
public void setTaggable_id(int taggable_id) {
this.taggable_id = taggable_id;
}
public String getTaggable_type() {
return taggable_type;
}
public void setTaggable_type(String taggable_type) {
this.taggable_type = taggable_type;
}
public int getTagger_id() {
return tagger_id;
}
public void setTagger_id(int tagger_id) {
this.tagger_id = tagger_id;
}
public String getTagger_type() {
return tagger_type;
}
public void setTagger_type(String tagger_type) {
this.tagger_type = tagger_type;
}
public String getContext() {
return context;
}
public void setContext(String context) {
this.context = context;
}
public String getCreated_at() {
return created_at;
}
public void setCreated_at(String created_at) {
this.created_at = created_at;
}
public int getDisagree_num() {
return disagree_num;
}
public void setDisagree_num(int disagree_num) {
this.disagree_num = disagree_num;
}
public String getTag_source() {
return tag_source;
}
public void setTag_source(String tag_source) {
this.tag_source = tag_source;
}
}

View File

@ -0,0 +1,18 @@
package com.ossean.projectmanager.parttabledao;
import org.apache.ibatis.annotations.Param;
import org.apache.ibatis.annotations.Select;
import com.ossean.projectmanager.model.OpenhubProject;
import com.ossean.projectmanager.model.SourceForgeProject;
public interface PartProjectDao {
@Select("select name,description,codeLocation from openhub_project where url = #{url}")
public OpenhubProject getOpenHubPrjByUrl( @Param("url") String url);
@Select("select name,description,download_num,stars from sourceforge_project where url = #{url}")
public SourceForgeProject getSourceForgePrjByUrl( @Param("url") String url);
}

View File

@ -0,0 +1,135 @@
package com.ossean.projectmanager.projectsfilter;
import java.util.List;
import javax.annotation.Resource;
import org.apache.commons.lang3.StringUtils;
import org.springframework.stereotype.Component;
import com.ossean.projectmanager.lasttabledao.OpenSourceProjectDao;
import com.ossean.projectmanager.lasttabledao.RelativeMemoToOpenSourceProjectDao;
import com.ossean.projectmanager.model.OpenhubProject;
import com.ossean.projectmanager.model.OpenSourceProject;
import com.ossean.projectmanager.model.SourceForgeProject;
import com.ossean.projectmanager.parttabledao.PartProjectDao;
@Component("projectsFilter")
public class ProjectsFilter {
@Resource
private OpenSourceProjectDao lastProjectDao;
@Resource
private PartProjectDao partProjectDao;
@Resource
private RelativeMemoToOpenSourceProjectDao matchResultDao;
/**
* 对项目总表根据各个社区的特定字段做筛选
*/
public void filtratePrjs() {
//
List<OpenSourceProject> prjsList = lastProjectDao.getBatchPrjs(10000000);
for(OpenSourceProject project : prjsList){
String prjUrl = project.getUrl();
String source = "";
String url="";
if(prjUrl.contains("|,|")){ //即url中包含多个项目来源
String firstUrl = StringUtils.splitByWholeSeparator(prjUrl, "|,|")[0];//只对第一个即去重时保留的最热的项目来源做筛选
source = StringUtils.splitByWholeSeparator(firstUrl, "|:|")[0]; //从url字段中取得第一个来源社区
url = StringUtils.splitByWholeSeparator(firstUrl, "|:|")[1]; //获得第一个url
}
else{ //url只有一个项目来源
source = StringUtils.splitByWholeSeparator(prjUrl, "|,|")[0];
url = StringUtils.splitByWholeSeparator(prjUrl, "|,|")[1];
}
if(source.equals("OpenHub")){
OpenhubProject openhubProject = partProjectDao.getOpenHubPrjByUrl(url); //根据url从openhub的项目分表获得项目信息
if(openhubProject.getName()!=null&&openhubProject.getName()!=""&&
openhubProject.getDescription()!=null&&openhubProject.getDescription()!=""&&
!openhubProject.getCodeLocation().contains("add a code location")){ //openhub的筛选条件为namedescription不为空且该项目有版本库
if(project.getFilration()==0){
lastProjectDao.updateFiltratedPrj(project.getId(), 1); //筛选标识从0变为1表示该项目经过筛选新增的
matchResultDao.deleteMatchResult(getTargetTable(project.getId()), project.getId()); //删除该项目的匹配结果确保无之前的匹配结果
}
else{
lastProjectDao.updateFiltratedPrj(project.getId(), 2); //筛选标识由1或2变为2表示该项目之前就是筛选作为保留的
}
}
else{
lastProjectDao.updateFiltratedPrj(project.getId(), 0); //筛选标识变为0表示该项目不保留
matchResultDao.deleteMatchResult(getTargetTable(project.getId()), project.getId()); //删除该项目的匹配结果
}
}
else if(source.equals("SourceForge")){
SourceForgeProject sourceforgeProject = partProjectDao.getSourceForgePrjByUrl(url); //根据url从SourceForge的项目分表获得项目信息
if(sourceforgeProject.getName()!=null&&sourceforgeProject.getName()!=""&&
sourceforgeProject.getDescription()!=null&&sourceforgeProject.getDescription()!=""&&
((sourceforgeProject.getDownload_num()>0) || (sourceforgeProject.getStars()>0))){
if(project.getFilration()==0){
lastProjectDao.updateFiltratedPrj(project.getId(), 1); //筛选标识从0变为1表示该项目经过筛选新增的
matchResultDao.deleteMatchResult(getTargetTable(project.getId()), project.getId()); //删除该项目的匹配结果确保无之前的匹配结果
}
else{
lastProjectDao.updateFiltratedPrj(project.getId(), 2); //筛选标识由1或2变为2表示该项目之前就是筛选作为保留的
}
}
else{
lastProjectDao.updateFiltratedPrj(project.getId(), 0); //筛选标识变为0表示该项目不保留
matchResultDao.deleteMatchResult(getTargetTable(project.getId()), project.getId()); //删除该项目的匹配结果
}
}
else{
if(project.getFilration()==0){
lastProjectDao.updateFiltratedPrj(project.getId(), 1); //筛选标识从0变为1表示该项目经过筛选新增的
matchResultDao.deleteMatchResult(getTargetTable(project.getId()), project.getId()); //删除该项目的匹配结果确保无之前的匹配结果
}
else{
lastProjectDao.updateFiltratedPrj(project.getId(), 2); //筛选标识由1或2变为2表示该项目之前就是筛选作为保留的
}
}
}
}
/**
* get the match result table's name
* @param osp_id
* @return
*/
public static String getTargetTable(int osp_id){
String targetTableName = "";
if (osp_id < 500) {
targetTableName = "relative_memo_to_open_source_projects_1";
}
else if (osp_id >= 500 && osp_id < 1000) {
targetTableName = "relative_memo_to_open_source_projects_2";
}
else if (osp_id >= 1000 && osp_id < 1500) {
targetTableName = "relative_memo_to_open_source_projects_3";
}
else if (osp_id >= 1500 && osp_id < 2000) {
targetTableName = "relative_memo_to_open_source_projects_4";
}
else if (osp_id >= 2000 && osp_id < 3000) {
targetTableName = "relative_memo_to_open_source_projects_5";
}
else if (osp_id >= 3000 && osp_id < 5000) {
targetTableName = "relative_memo_to_open_source_projects_6";
}
else if (osp_id >= 5000 && osp_id < 7500) {
targetTableName = "relative_memo_to_open_source_projects_7";
}
else if (osp_id >= 7500 && osp_id < 10000) {
targetTableName = "relative_memo_to_open_source_projects_8";
}
else if (osp_id >= 10000 && osp_id < 310000) {
int a = 7 + osp_id/5000;
targetTableName = "relative_memo_to_open_source_projects_" + a;
}
else if (osp_id >= 310000) {
targetTableName = "relative_memo_to_open_source_projects_70";
}
return targetTableName;
}
}

View File

@ -0,0 +1,61 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:aop="http://www.springframework.org/schema/aop"
xmlns:p="http://www.springframework.org/schema/p"
xmlns:tx="http://www.springframework.org/schema/tx"
xsi:schemaLocation="http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context-3.0.xsd
http://www.springframework.org/schema/tx
http://www.springframework.org/schema/tx/spring-tx-3.0.xsd
http://www.springframework.org/schema/aop
http://www.springframework.org/schema/aop/spring-aop-3.0.xsd">
<!--SourceData -->
<bean id="dataSourceOne" class="org.apache.commons.dbcp.BasicDataSource"
destroy-method="close">
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
<property name="url"
value="jdbc:mysql://localhost:3306/ossean_production?characterEncoding=UTF-8&amp;zeroDateTimeBehavior=convertToNull&amp;autoReconnect=true" />
<property name="username" value="root" />
<property name="password" value="1234" />
<property name="validationQuery" value="SELECT 1" />
<property name="testOnBorrow" value="true"/>
</bean>
<bean id="sqlSessionFactoryOne" class="org.mybatis.spring.SqlSessionFactoryBean">
<property name="dataSource" ref="dataSourceOne" />
</bean>
<bean class="org.mybatis.spring.mapper.MapperScannerConfigurer">
<property name="basePackage" value="com.ossean.projectmanager.parttabledao" />
<property name="sqlSessionFactory" ref="sqlSessionFactoryOne"></property>
</bean>
<!--DestData -->
<bean id="dataSourceTwo" class="org.apache.commons.dbcp.BasicDataSource"
destroy-method="close">
<property name="driverClassName" value="com.mysql.jdbc.Driver" />
<property name="url"
value="jdbc:mysql://localhost:3306/ossean_production?characterEncoding=UTF-8&amp;zeroDateTimeBehavior=convertToNull&amp;autoReconnect=true" />
<property name="username" value="root" />
<property name="password" value="1234" />
<property name="validationQuery" value="SELECT 1" />
<property name="testOnBorrow" value="true"/>
</bean>
<bean id="sqlSessionFactoryTwo" class="org.mybatis.spring.SqlSessionFactoryBean">
<property name="dataSource" ref="dataSourceTwo" />
</bean>
<bean class="org.mybatis.spring.mapper.MapperScannerConfigurer">
<property name="basePackage" value="com.ossean.projectmanager.lasttabledao" />
<property name="sqlSessionFactory" ref="sqlSessionFactoryTwo"></property>
</bean>
<tx:annotation-driven transaction-manager="transactionManager"/>
<bean id="transactionManager" class="org.springframework.jdbc.datasource.DataSourceTransactionManager">
<property name="dataSource" ref="dataSourceOne" />
</bean>
</beans>

View File

@ -0,0 +1,16 @@
<?xml version="1.0" encoding="UTF-8"?>
<beans xmlns="http://www.springframework.org/schema/beans"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns:context="http://www.springframework.org/schema/context"
xmlns:mvc="http://www.springframework.org/schema/mvc"
xsi:schemaLocation="http://www.springframework.org/schema/mvc
http://www.springframework.org/schema/mvc/spring-mvc-3.0.xsd
http://www.springframework.org/schema/beans
http://www.springframework.org/schema/beans/spring-beans-3.0.xsd
http://www.springframework.org/schema/context
http://www.springframework.org/schema/context/spring-context-3.0.xsd">
<context:annotation-config/>
<context:component-scan base-package="com.ossean.projectmanager"/>
</beans>

View File

@ -0,0 +1,77 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
<appender name="stdout" class="org.apache.log4j.ConsoleAppender">
<layout class="org.apache.log4j.PatternLayout">
<param name="threshold" value="ERROR" />
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<appender name="file" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="./log/error.log" />
<param name="threshold" value="ERROR" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<appender name="file_log" class="org.apache.log4j.DailyRollingFileAppender">
<param name="File" value="./log/webmagic.log" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%d{yy-MM-dd HH:mm:ss,SSS} %-5p %c(%F:%L) ## %m%n" />
</layout>
</appender>
<!-- 邮件只有ERROR时才会发送 -->
<appender name="MAIL" class="org.apache.log4j.net.SMTPAppender">
<param name="threshold" value="debug" />
<!-- 日志的错误级别 <param name="threshold" value="fatal"/> -->
<!-- 缓存文件大小日志达到512K时发送Email -->
<param name="BufferSize" value="1" />
<!-- 单位K -->
<param name="From" value="ossean_debug@163.com" />
<param name="SMTPHost" value="smtp.163.com" />
<param name="Subject" value="ossean-crawler-debug-log4jMessage" />
<param name="To" value="gcm3651@126.com" />
<param name="SMTPUsername" value="ossean_debug" />
<param name="SMTPPassword" value="goodwell123" />
<layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="%-d{yyyy-MM-dd HH:mm:ss.SSS} [%p]-[%c] %m%n" />
</layout>
</appender>
<!-- 数据库状态-->
<appender name="DATABASE" class="org.apache.log4j.jdbc.JDBCAppender">
<param name="URL" value="jdbc:mysql://127.0.0.1:3306/webmagic?characterEncoding=UTF-8"/>
<param name="driver" value="com.mysql.jdbc.Driver"/>
<param name="user" value="root"/>
<param name="password" value="1234"/>
<param name="sql" value="INSERT INTO log4j(stamp,thread,info_level,class,message,logger) VALUES ('%d{yyyy-MM-dd HH:mm:ss}','%t','%p','%c','%m','%l')"/>
<!-- <layout class="org.apache.log4j.PatternLayout">
<param name="ConversionPattern" value="INSERT INTO log4j(stamp,thread,info_level,class,message,logger) VALUES ('%d{yyyy-MM-dd HH:mm:ss}','%t','%.50p','%.50c','%.1000m','%.50l')" />
</layout>-->
<!-- 过滤输出时Log内容在这里LevelMin是ERRORLevelMax都 FATAL所以输出DEBUG级别到FATAL级别的LOG数据-->
<filter class="org.apache.log4j.varia.LevelRangeFilter">
<param name="LevelMin" value="DEBUG"/>
<param name="LevelMax" value="FATAL"/>
</filter>
</appender>
<logger name="org.apache" additivity="false">
<level value="warn" />
<appender-ref ref="stdout" />
</logger>
<root>
<level value="info" />
<appender-ref ref="stdout" />
<appender-ref ref="file" />
<appender-ref ref="file_log" />
<!-- <appender-ref ref="MAIL" />-->
<!-- <appender-ref ref="DATABASE" /> -->
</root>
</log4j:configuration>