<<上篇 | 首页 | 下篇>>

动态切换数据源(spring+hibernate) - liheping - 博客园

思路:动态切换数据源确切的来说是在同一类型数据库的情况下的。意思就是说 , 在系统中的使用的数据库分布在多台数据库服务器或者在同台服务器上的多个数据库. 在运行时期间根据某种标识符来动态的选择当前操作的数据库.
     1.数据源是相同类型的数据库: 一个SessionFactory+动态数据源+一个事务管理器
     2.数据源是不同类型的数据库: 根据类型 配置多套SessionFactory
 
模拟:两个mysql数据源+一个Access数据源

实现

1.切换数据源需要标识符,标识符是Object类型
package lhp.example.context;
public enum DBType {
dataSource1, dataSource2;
}

2.然后创建一个用于切换数据源(设置或者获得上下文)的工具类
复制代码
package lhp.example.context;

public class ContextHolder {
private static final ThreadLocal<Object> holder = new ThreadLocal<Object>();

public static void setDbType(DBType dbType) {
holder.set(dbType);
}

public static DBType getDbType() {
return (DBType) holder.get();
}

public static void clearDbType() {
holder.remove();
}
}
复制代码

3.创建动态数据源类,继承org.springframework.jdbc.datasource.lookup.AbstractRoutingDataSource这个类.
复制代码
package lhp.example.context;

import java.util.logging.Logger;

import org.springframework.jdbc.datasource.lookup.AbstractRoutingDataSource;

public class DynamicDataSource extends AbstractRoutingDataSource {
public static final Logger logger = Logger.getLogger(DynamicDataSource.class.toString());

@Override
protected Object determineCurrentLookupKey() {
DBType key = ContextHolder.getDbType();//获得当前数据源标识符
//logger.info("当前数据源 :" + key);
return key;
}

}
复制代码

4.然后配置多个数据源
复制代码
<!-- 数据源1 : mysql -->
<bean id="dataSource1" class="com.mchange.v2.c3p0.ComboPooledDataSource">
<property name="driverClass" value="com.mysql.jdbc.Driver" />
<property name="jdbcUrl" value="jdbc:mysql://127.0.0.1:3306/dec" />
<property name="user" value="root" />
<property name="password" value="" />
</bean>
<!-- 数据源2 : mysql -->
<bean id="dataSource2" class="com.mchange.v2.c3p0.ComboPooledDataSource">
<property name="driverClass" value="com.mysql.jdbc.Driver" />
<property name="jdbcUrl" value="jdbc:mysql://127.0.0.1:3306/lms" />
<property name="user" value="root" />
<property name="password" value="" />
</bean>

<!-- 数据源3 : access -->
<bean id="dataSource3" class="com.mchange.v2.c3p0.ComboPooledDataSource">
<property name="driverClass" value="sun.jdbc.odbc.JdbcOdbcDriver" />
<property name="jdbcUrl" value="jdbc:odbc:accessTest" />
<property name="user" value="administrator" />
<property name="password" value="XLZX0309" />
</bean>

<!-- mysql 动态数据源设置-->
<bean id="mysqlDynamicDataSource" class="lhp.example.context.DynamicDataSource">
<property name="targetDataSources">
<!-- 标识符类型 -->
<map key-type="lhp.example.context.DBType">
<entry key="dataSource1" value-ref="dataSource1" />
<entry key="dataSource2" value-ref="dataSource2" />
</map>
</property>
<property name="defaultTargetDataSource" ref="dataSource1" />
</bean>
复制代码

5.配置sessionFactory
复制代码
<!-- mysql sessionFactory -->
<bean id="mysqlSessionFactory" class="org.springframework.orm.hibernate3.LocalSessionFactoryBean">
<property name="dataSource" ref="mysqlDynamicDataSource" />
<property name="hibernateProperties">
<props>
<prop key="hibernate.dialect">org.hibernate.dialect.MySQLDialect</prop>
<prop key="hibernate.show_sql">true</prop>
<prop key="hibernate.hbm2ddl.auto">update</prop><!--create validate -->
<prop key="hibernate.query.substitutions">true 1, false 0</prop>
</props>
</property>
</bean>

<!-- access sessionFactory -->
<bean id="aceessSessionFactory" class="org.springframework.orm.hibernate3.LocalSessionFactoryBean">
<property name="dataSource" ref="dataSource3" />
<property name="hibernateProperties">
<props>
<!-- access 语法和MSSQL相似 所以用的MSSQL方言,或者可以使用第三方方言 -->
<prop key="hibernate.dialect">org.hibernate.dialect.SQLServerDialect</prop>
<prop key="hibernate.jdbc.batch_size">30</prop>
<prop key="hibernate.jdbc.fetch_size">50</prop>
<prop key="hibernate.show_sql">true</prop>
<prop key="hibernate.format_sql">false</prop>
<prop key="hibernate.hbm2ddl.auto">update</prop><!--create validate -->
<prop key="hibernate.query.substitutions">true 1, false 0</prop>
<prop key="hibernate.cglib.use_reflection_optimizer">true</prop>
<!-- <prop key="hibernate.cache.use_second_level_cache">true</prop> -->
<!-- <prop key="hibernate.cache.provider_class">org.hibernate.cache.EhCacheProvider</prop> -->
<!-- <prop key="hibernate.cache.use_query_cache">true</prop> -->
<!-- <prop key="hibernate.generate_statistics">true</prop> -->
<!-- <prop key="hibernate.cache.provider_configuration_file_resource_path">classpath:ehcache.xml</prop> -->
</props>
</property>
</bean>
复制代码

6.测试用例
复制代码
package lhp.example.junit;

import static org.junit.Assert.*;
import java.sql.DatabaseMetaData;
import lhp.example.context.ContextHolder;
import lhp.example.context.DBType;
import org.hibernate.Session;
import org.hibernate.SessionFactory;
import org.junit.Before;
import org.junit.Test;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;

public class ServiceTest {
private ApplicationContext context;
//三个数据源的URL
private String dataSource1_URL = "jdbc:mysql://127.0.0.1:3306/dec";
private String dataSource2_URL = "jdbc:mysql://127.0.0.1:3306/lms";
private String dataSource3_URL = "jdbc:odbc:accessTest";
private SessionFactory mysqlSessionFactory;
private SessionFactory aceessSessionFactory;

@Before
public void setUp() throws Exception {
// 选择数据源初始化spring
ContextHolder.setDbType(DBType.dataSource1);
//
String[] xmlFiles = new String[] {
"applicationContext-dataSource.xml",
"applicationContext-hibernate.xml",
"applicationContext-spring.xml" };
//
context = new ClassPathXmlApplicationContext(xmlFiles);
//
mysqlSessionFactory = (SessionFactory) context.getBean("mysqlSessionFactory");
aceessSessionFactory = (SessionFactory) context.getBean("aceessSessionFactory");
}

@SuppressWarnings("deprecation")
@Test
public void mysqlDataSourceTest() {
try {

Session mysqlSession = mysqlSessionFactory.openSession();
// 获得数据库元数据
DatabaseMetaData meatData = mysqlSession.connection().getMetaData();

// 默认启动数据源 dataSource1
//断言当前数据源URL是否是dataSource1的URL
assertEquals(dataSource1_URL, meatData.getURL());

// 切换到数据源 dataSource2
ContextHolder.setDbType(DBType.dataSource2);
mysqlSession = mysqlSessionFactory.openSession();
meatData = mysqlSession.connection().getMetaData();
//断言当前数据源URL是否是dataSource2的URL
assertEquals(dataSource2_URL, meatData.getURL());

} catch (Exception e) {
e.printStackTrace();
}
}

@SuppressWarnings("deprecation")
@Test
public void accessDataSourceTest() {
try {
Session accessSession = aceessSessionFactory.openSession();
// 获得数据库元数据
DatabaseMetaData meatData = accessSession.connection().getMetaData();
//断言当前数据源URL是否是dataSource3的URL
assertEquals(dataSource3_URL, meatData.getURL());


} catch (Exception e) {
e.printStackTrace();
}
}

}
复制代码

阅读全文……

标签 : ,

Mahout学习笔记-分类算法之Decision Forest - 潘的博客 - 博客园

根据最近的Mahout Wiki,Mahout实现的分类算法有:随机梯度下降(SGD),贝叶斯分类,Online Passive Aggressive,HMM和决策森林(随机森林)。随机梯度下降当前不能并行处理,贝叶斯分类更适合处理文本数据,所以这两个算法都不太适合我的应用场景(并行处理,特征类型为数字),OPA和HMM不太熟悉,所以就选用了决策森林(随机森林)。

  决策森林,顾名思义,就是由多个决策树组成森林,然后用这个森林进行分类,非常适合用MapReduce实现,进行并行处理。决策森林又称为随机森林,这是因为不同于常规的决策树(ID3,C4.5),决策森林中每个每棵树的每个节点在选择该点的分类特征时并不是从所有的输入特征里选择一个最好的,而是从所有的M个输入特征里随机的选择m个特征,然后从这m个特征里选择一个最好的(这样比较适合那种输入特征数量特别多的应用场景,在输入特征数量不多的情况下,我们可以取m=M)。然后针对目标特征类型的不同,取多个决策树的平均值(目标特征类型为数字类型(numeric))或大多数投票(目标特征类型为类别(category))。

  在Mahout的example中有一个Decision Tree的例子,可以直接在命令行运行:

  1. 准备数据:

  数据为Breiman提供的glass:http://archive.ics.uci.edu/ml/datasets/Glass+Identification

  2. 生成数据的说明文件:

  在Mahout目录下执行:bin/mahout org.apache.mahout.df.tools.Describe -p testdata/glass.data -f testdata/glass.info -d I 9 N L

  数据格式为CSV,最后的I 9 N L说明各特征的属性:

  I表示忽略第一个特征值(该特征值一般用来标示每一条训练样例,亦即可以作为ID)。

  9 N表示接下来的9个特征是输入特征,类型为数字类型。

  L 表示该特征是目标特征,亦即Label。

  以glass文件的前几行为例为例:

1,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.00,1
2,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.00,1
3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.00,1
4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.00,1
5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.00,1

  第一个特征被忽略,因为这个特征是作为ID用来表示每个样例的,2-10是9个输入特征,用来训练分类器,类型为数字(Numeric),最后一个特征是目标特征,代表每个样例所属的类别,这里所有样例都属于"1"类。

  3. 进行分类和测试

  在Mahout目录下执行:bin/mahout org.apache.mahout.df.BreimanExample -d testdata/glass.data -ds testdata/glass.info -i 10 -t 100

  -i表示迭代的次数

  -t表示每棵决策树的节点的个数

  BreimanExample默认会构造两个森林,一个取m=1,一个取m=log(M+1)。之所以这么做是为了说明即使m值很小,整个森林的分类结果也会挺好。

阅读全文……

推荐引擎:使用Mahout协同过滤

Mahout is a collection of machine learning algorithms intended to perform the following operations as recommendation (Collaborative Filtering), Clustering and Classification. Initially to implement recommendation we need an input data file where every line contains one record each. Each record should have the user ID, Item ID and preference value in order separated by comma.
 
Input File – input.txt
501,1002,5
501,1012,3
510,1002,2
515,1002,5
501,1020,1
                The point to be considered here that we need the User Id and Item ID to be integers, alpha numeric characters won’t serve our purpose. Also the larger the input files better the quality of recommendations produced
 
Recommenders
                Recommenders are broadly classified into two categories based on the method or approach they use in generating recommendations
1.       User Based Recommendations
                Recommendations are derived from how similar items are to items, ie based on the items a user has already more similar items are recommended
 
2.       Item Based Recommendations
                Recommendations are derived on how similar users to users are. ie to make recommendations for a user(User1) we take into account an user/users who shares similar tastes and based on the items they possess we recommend items to User1
 
 
                When we make mahout recommendations the key components involved are
Data Model
                It is an encapsulation used by Mahout to hold input data. It helps efficient access to data by various recommender algorithms.
Similarity Algorithm
                There are various kind of Similarity algorithms available and mahout has implementations of all the popular ones like Person Correlation, Cosine Measure, Euclidean Distance, Log Likelihood, Tanimoto coefficient etc
User Neighborhood
                This is applicable for user based recommendations, user based recommendations are made based on user to user similarity. We form a neighborhood of most similar users that share almost same tastes so that we get better recommendations. And the algorithms thet we use to select user neighborhood are
1.       Nearest N User Neighborhood
                Here we specify the neighborhood size, ie exactly the number of most similar uses to be considered for generating recommendations say 100,500 etc
2.       Threshold User Neighborhood
                We don’t specify the neighborhood size, rather we specify a similarity measure which is a value between -1 and +1. If we specify a value .7 then only the users that share a similarity greater than ).7 would be considered in neighborhood. Higher the value more similar the users are
Recommender
                It is the final computing object which couples together the datamodel, similarity algorithm and neighborhood to generate recommendations based on the same
 
                Samples code snippets to generate user and item based recommendations are given below
 
User Based Recommender
 
import java.io.File;
import java.io.IOException;
import java.util.List;
 
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
importorg.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
importorg.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
importorg.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
 
public class UserRecommender {
     
      public static void main(String args[])
      {
            // specifying the user id to which the recommendations have to be generated for
            int userId=510;
           
            //specifying the number of recommendations to be generated
            int noOfRecommendations=5;
           
            try
            {
                  // Data model created to accept the input file
                  FileDataModel dataModel = new FileDataModel(newFile("D://input.txt"));
                 
                  /*Specifies the Similarity algorithm*/
                  UserSimilarity userSimilarity = newPearsonCorrelationSimilarity(dataModel);
                 
                  /*NearestNUserNeighborhood is preferred in situations where we need to have control on the exact no of neighbors*/
                  UserNeighborhood neighborhood =newNearestNUserNeighborhood(100, userSimilarity, dataModel);
                 
                  /*Initalizing the recommender */
                  Recommender recommender =newGenericUserBasedRecommender(dataModel, neighborhood, userSimilarity);
                 
                  //calling the recommend method to generate recommendations
                  List<RecommendedItem> recommendations =recommender.recommend(userId, noOfRecommendations);
           
                  //
                  for (RecommendedItem recommendedItem : recommendations)
                        System.out.println(recommendedItem.getItemID());
            }
            catch (IOException e) {
                  // TODO Auto-generated catch block
                  e.printStackTrace();
            } catch (TasteException e) {
                  // TODO Auto-generated catch block
                  e.printStackTrace();
            }
           
                 
      }
 
}
 
 
Item Based Recommender
 
import java.io.File;
import java.io.IOException;
import java.util.List;
 
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
importorg.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
importorg.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
importorg.apache.mahout.cf.taste.recommender.ItemBasedRecommender;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
 
public class ItemRecommender {
     
      public static void main(String args[])
      {
            // specifying the user id to which the recommendations have to be generated for
            int userId=510;
           
            //specifying the number of recommendations to be generated
            int noOfRecommendations=5;
           
            try
            {
                  // Data model created to accept the input file
                  FileDataModel dataModel = new FileDataModel(newFile("D://input.txt"));
                 
                  /*Specifies the Similarity algorithm*/
                  ItemSimilarity itemSimilarity = newPearsonCorrelationSimilarity(dataModel);
                 
                  /*Initalizing the recommender */
                  ItemBasedRecommender recommender =newGenericItemBasedRecommender(dataModel, itemSimilarity);
                 
                  //calling the recommend method to generate recommendations
                  List<RecommendedItem> recommendations =recommender.recommend(userId, noOfRecommendations);
           
                  //
                  for (RecommendedItem recommendedItem : recommendations)
                        System.out.println(recommendedItem.getItemID());
            }
            catch (IOException e) {
                  // TODO Auto-generated catch block
                  e.printStackTrace();
            } catch (TasteException e) {
                  // TODO Auto-generated catch block
                  e.printStackTrace();
            }
           
      }
}
 
Note: To get some recommendations you a sufficiently large input file. A few lines of input won’t gain you any recommendations

 

参考:基于 Apache Mahout 构建社会化推荐引擎:http://www.ibm.com/developerworks/cn/java/j-lo-mahout/