动态切换数据源(spring+hibernate) - liheping - 博客园

思路：动态切换数据源确切的来说是在同一类型数据库的情况下的。意思就是说 , 在系统中的使用的数据库分布在多台数据库服务器或者在同台服务器上的多个数据库. 在运行时期间根据某种标识符来动态的选择当前操作的数据库.

1.数据源是相同类型的数据库: 一个SessionFactory+动态数据源+一个事务管理器

2.数据源是不同类型的数据库: 根据类型配置多套SessionFactory

模拟：两个mysql数据源+一个Access数据源

实现：

1.切换数据源需要标识符,标识符是Object类型

package lhp.example.context;
public enum DBType {
      dataSource1, dataSource2;
}

2.然后创建一个用于切换数据源(设置或者获得上下文)的工具类

package lhp.example.context;

public class ContextHolder {
    private static final ThreadLocal<Object> holder = new ThreadLocal<Object>();

    public static void setDbType(DBType dbType) {
        holder.set(dbType);
    }

    public static DBType getDbType() {
        return (DBType) holder.get();
    }

    public static void clearDbType() {
        holder.remove();
    }
}

3.创建动态数据源类,继承org.springframework.jdbc.datasource.lookup.AbstractRoutingDataSource这个类.

package lhp.example.context;

import java.util.logging.Logger;

import org.springframework.jdbc.datasource.lookup.AbstractRoutingDataSource;

public class DynamicDataSource extends AbstractRoutingDataSource {
    public static final Logger logger = Logger.getLogger(DynamicDataSource.class.toString());

    @Override
    protected Object determineCurrentLookupKey() {
        DBType key = ContextHolder.getDbType();//获得当前数据源标识符
        //logger.info("当前数据源 :" + key);
        return key;
    }

}

4.然后配置多个数据源

<!-- 数据源1 ： mysql -->
    <bean id="dataSource1" class="com.mchange.v2.c3p0.ComboPooledDataSource">
        <property name="driverClass" value="com.mysql.jdbc.Driver" />
        <property name="jdbcUrl" value="jdbc:mysql://127.0.0.1:3306/dec" />
        <property name="user" value="root" />
        <property name="password" value="" />
    </bean>
    <!-- 数据源2 ： mysql  -->
    <bean id="dataSource2" class="com.mchange.v2.c3p0.ComboPooledDataSource">
        <property name="driverClass" value="com.mysql.jdbc.Driver" />
        <property name="jdbcUrl" value="jdbc:mysql://127.0.0.1:3306/lms" />
        <property name="user" value="root" />
        <property name="password" value="" />
    </bean>

    <!-- 数据源3 ：  access -->
    <bean id="dataSource3" class="com.mchange.v2.c3p0.ComboPooledDataSource">
        <property name="driverClass" value="sun.jdbc.odbc.JdbcOdbcDriver" />
        <property name="jdbcUrl" value="jdbc:odbc:accessTest" />
        <property name="user" value="administrator" />
        <property name="password" value="XLZX0309" />
    </bean>

    <!-- mysql 动态数据源设置-->
    <bean id="mysqlDynamicDataSource" class="lhp.example.context.DynamicDataSource">
        <property name="targetDataSources">
            <!-- 标识符类型 -->
            <map key-type="lhp.example.context.DBType">
                <entry key="dataSource1" value-ref="dataSource1" />
                <entry key="dataSource2" value-ref="dataSource2" />
            </map>
        </property>
        <property name="defaultTargetDataSource" ref="dataSource1" />
    </bean>

5.配置sessionFactory

<!-- mysql sessionFactory -->
    <bean id="mysqlSessionFactory" class="org.springframework.orm.hibernate3.LocalSessionFactoryBean">
        <property name="dataSource" ref="mysqlDynamicDataSource" />
        <property name="hibernateProperties">
            <props>
                <prop key="hibernate.dialect">org.hibernate.dialect.MySQLDialect</prop>
                <prop key="hibernate.show_sql">true</prop>
                <prop key="hibernate.hbm2ddl.auto">update</prop><!--create validate -->
                <prop key="hibernate.query.substitutions">true 1, false 0</prop>
            </props>
        </property>
    </bean>
    
    <!-- access sessionFactory -->
    <bean id="aceessSessionFactory" class="org.springframework.orm.hibernate3.LocalSessionFactoryBean">
        <property name="dataSource" ref="dataSource3" />
        <property name="hibernateProperties">
            <props>
                <!-- access 语法和MSSQL相似 所以用的MSSQL方言，或者可以使用第三方方言 -->
                <prop key="hibernate.dialect">org.hibernate.dialect.SQLServerDialect</prop>
                <prop key="hibernate.jdbc.batch_size">30</prop>
                <prop key="hibernate.jdbc.fetch_size">50</prop>
                <prop key="hibernate.show_sql">true</prop>
                <prop key="hibernate.format_sql">false</prop>
                <prop key="hibernate.hbm2ddl.auto">update</prop><!--create validate -->
                <prop key="hibernate.query.substitutions">true 1, false 0</prop>
                <prop key="hibernate.cglib.use_reflection_optimizer">true</prop>
                <!-- <prop key="hibernate.cache.use_second_level_cache">true</prop> -->
                <!-- <prop key="hibernate.cache.provider_class">org.hibernate.cache.EhCacheProvider</prop> -->
                <!-- <prop key="hibernate.cache.use_query_cache">true</prop> -->
                <!-- <prop key="hibernate.generate_statistics">true</prop> -->
                <!-- <prop key="hibernate.cache.provider_configuration_file_resource_path">classpath:ehcache.xml</prop> -->
            </props>
        </property>
    </bean>

6.测试用例

package lhp.example.junit;

import static org.junit.Assert.*;
import java.sql.DatabaseMetaData;
import lhp.example.context.ContextHolder;
import lhp.example.context.DBType;
import org.hibernate.Session;
import org.hibernate.SessionFactory;
import org.junit.Before;
import org.junit.Test;
import org.springframework.context.ApplicationContext;
import org.springframework.context.support.ClassPathXmlApplicationContext;

public class ServiceTest {
    private ApplicationContext context;
    //三个数据源的URL
    private String dataSource1_URL = "jdbc:mysql://127.0.0.1:3306/dec";
    private String dataSource2_URL = "jdbc:mysql://127.0.0.1:3306/lms";
    private String dataSource3_URL = "jdbc:odbc:accessTest";
    private SessionFactory mysqlSessionFactory;
    private SessionFactory aceessSessionFactory;

    @Before
    public void setUp() throws Exception {
        // 选择数据源初始化spring
        ContextHolder.setDbType(DBType.dataSource1);
        //
        String[] xmlFiles = new String[] { 
                "applicationContext-dataSource.xml",
                "applicationContext-hibernate.xml",
                "applicationContext-spring.xml" };
        //
        context = new ClassPathXmlApplicationContext(xmlFiles);
        //
        mysqlSessionFactory = (SessionFactory) context.getBean("mysqlSessionFactory");
        aceessSessionFactory = (SessionFactory) context.getBean("aceessSessionFactory");
    }

    @SuppressWarnings("deprecation")
    @Test
    public void mysqlDataSourceTest() {
        try {

            Session mysqlSession = mysqlSessionFactory.openSession();
            // 获得数据库元数据
            DatabaseMetaData meatData = mysqlSession.connection().getMetaData();

            // 默认启动数据源 dataSource1
            //断言当前数据源URL是否是dataSource1的URL
            assertEquals(dataSource1_URL, meatData.getURL());

            // 切换到数据源 dataSource2
            ContextHolder.setDbType(DBType.dataSource2);
            mysqlSession = mysqlSessionFactory.openSession();
            meatData = mysqlSession.connection().getMetaData();
            //断言当前数据源URL是否是dataSource2的URL
            assertEquals(dataSource2_URL, meatData.getURL());

        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    
    @SuppressWarnings("deprecation")
    @Test
    public void accessDataSourceTest() {
        try {
            Session accessSession = aceessSessionFactory.openSession();
            // 获得数据库元数据
            DatabaseMetaData meatData = accessSession.connection().getMetaData();
            //断言当前数据源URL是否是dataSource3的URL
            assertEquals(dataSource3_URL, meatData.getURL());


        } catch (Exception e) {
            e.printStackTrace();
        }
    }

}

阅读全文……

标签 : hibernate, spring

发表评论

IT瘾于2013年8月27日下午02时32分00秒发布 #

Mahout学习笔记-分类算法之Decision Forest - 潘的博客 - 博客园

根据最近的Mahout Wiki，Mahout实现的分类算法有：随机梯度下降（SGD），贝叶斯分类，Online Passive Aggressive，HMM和决策森林（随机森林）。随机梯度下降当前不能并行处理，贝叶斯分类更适合处理文本数据，所以这两个算法都不太适合我的应用场景（并行处理，特征类型为数字），OPA和HMM不太熟悉，所以就选用了决策森林（随机森林）。

决策森林，顾名思义，就是由多个决策树组成森林，然后用这个森林进行分类，非常适合用MapReduce实现，进行并行处理。决策森林又称为随机森林，这是因为不同于常规的决策树（ID3，C4.5），决策森林中每个每棵树的每个节点在选择该点的分类特征时并不是从所有的输入特征里选择一个最好的，而是从所有的M个输入特征里随机的选择m个特征，然后从这m个特征里选择一个最好的（这样比较适合那种输入特征数量特别多的应用场景，在输入特征数量不多的情况下，我们可以取m=M）。然后针对目标特征类型的不同，取多个决策树的平均值（目标特征类型为数字类型（numeric））或大多数投票（目标特征类型为类别（category））。

在Mahout的example中有一个Decision Tree的例子，可以直接在命令行运行：

1. 准备数据：

数据为Breiman提供的glass：http://archive.ics.uci.edu/ml/datasets/Glass+Identification。

2. 生成数据的说明文件：

在Mahout目录下执行：bin/mahout org.apache.mahout.df.tools.Describe -p testdata/glass.data -f testdata/glass.info -d I 9 N L

数据格式为CSV，最后的I 9 N L说明各特征的属性：

I表示忽略第一个特征值（该特征值一般用来标示每一条训练样例，亦即可以作为ID）。

9 N表示接下来的9个特征是输入特征，类型为数字类型。

L 表示该特征是目标特征，亦即Label。

以glass文件的前几行为例为例：

1,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.00,1
2,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.00,1
3,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.00,1
4,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.00,1
5,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.00,1

第一个特征被忽略，因为这个特征是作为ID用来表示每个样例的，2-10是9个输入特征，用来训练分类器，类型为数字（Numeric），最后一个特征是目标特征，代表每个样例所属的类别，这里所有样例都属于"1"类。

3. 进行分类和测试

在Mahout目录下执行：bin/mahout org.apache.mahout.df.BreimanExample -d testdata/glass.data -ds testdata/glass.info -i 10 -t 100

-i表示迭代的次数

-t表示每棵决策树的节点的个数

BreimanExample默认会构造两个森林，一个取m=1，一个取m=log（M+1）。之所以这么做是为了说明即使m值很小，整个森林的分类结果也会挺好。

阅读全文……

标签 : mahout, tech, 数据挖掘

发表评论

IT瘾于2013年8月19日下午03时06分00秒发布 #

推荐引擎：使用Mahout协同过滤

Mahout is a collection of machine learning algorithms intended to perform the following operations as recommendation (Collaborative Filtering), Clustering and Classification. Initially to implement recommendation we need an input data file where every line contains one record each. Each record should have the user ID, Item ID and preference value in order separated by comma.

Input File – input.txt

501,1002,5

501,1012,3

510,1002,2

515,1002,5

501,1020,1

…

The point to be considered here that we need the User Id and Item ID to be integers, alpha numeric characters won’t serve our purpose. Also the larger the input files better the quality of recommendations produced

Recommenders

Recommenders are broadly classified into two categories based on the method or approach they use in generating recommendations

1. User Based Recommendations

Recommendations are derived from how similar items are to items, ie based on the items a user has already more similar items are recommended

2. Item Based Recommendations

Recommendations are derived on how similar users to users are. ie to make recommendations for a user(User1) we take into account an user/users who shares similar tastes and based on the items they possess we recommend items to User1

When we make mahout recommendations the key components involved are

Data Model

It is an encapsulation used by Mahout to hold input data. It helps efficient access to data by various recommender algorithms.

Similarity Algorithm

There are various kind of Similarity algorithms available and mahout has implementations of all the popular ones like Person Correlation, Cosine Measure, Euclidean Distance, Log Likelihood, Tanimoto coefficient etc

User Neighborhood

This is applicable for user based recommendations, user based recommendations are made based on user to user similarity. We form a neighborhood of most similar users that share almost same tastes so that we get better recommendations. And the algorithms thet we use to select user neighborhood are

1. Nearest N User Neighborhood

Here we specify the neighborhood size, ie exactly the number of most similar uses to be considered for generating recommendations say 100,500 etc

2. Threshold User Neighborhood

We don’t specify the neighborhood size, rather we specify a similarity measure which is a value between -1 and +1. If we specify a value .7 then only the users that share a similarity greater than ).7 would be considered in neighborhood. Higher the value more similar the users are

Recommender

It is the final computing object which couples together the datamodel, similarity algorithm and neighborhood to generate recommendations based on the same

Samples code snippets to generate user and item based recommendations are given below

User Based Recommender

import java.io.File;

import java.io.IOException;

import java.util.List;

import org.apache.mahout.cf.taste.common.TasteException;

import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;

importorg.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;

importorg.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;

importorg.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;

import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;

import org.apache.mahout.cf.taste.recommender.RecommendedItem;

import org.apache.mahout.cf.taste.recommender.Recommender;

import org.apache.mahout.cf.taste.similarity.UserSimilarity;

public class UserRecommender {

public static void main(String args[])

{

// specifying the user id to which the recommendations have to be generated for

int userId=510;

//specifying the number of recommendations to be generated

int noOfRecommendations=5;

try

{

// Data model created to accept the input file

FileDataModel dataModel = new FileDataModel(newFile("D://input.txt"));

/*Specifies the Similarity algorithm*/

UserSimilarity userSimilarity = newPearsonCorrelationSimilarity(dataModel);

/*NearestNUserNeighborhood is preferred in situations where we need to have control on the exact no of neighbors*/

UserNeighborhood neighborhood =newNearestNUserNeighborhood(100, userSimilarity, dataModel);

/*Initalizing the recommender */

Recommender recommender =newGenericUserBasedRecommender(dataModel, neighborhood, userSimilarity);

//calling the recommend method to generate recommendations

List<RecommendedItem> recommendations =recommender.recommend(userId, noOfRecommendations);

for (RecommendedItem recommendedItem : recommendations)

System.out.println(recommendedItem.getItemID());

}

catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

} catch (TasteException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

Item Based Recommender

import java.io.File;

import java.io.IOException;

import java.util.List;

import org.apache.mahout.cf.taste.common.TasteException;

import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;

importorg.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;

importorg.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;

importorg.apache.mahout.cf.taste.recommender.ItemBasedRecommender;

import org.apache.mahout.cf.taste.recommender.RecommendedItem;

import org.apache.mahout.cf.taste.similarity.ItemSimilarity;

public class ItemRecommender {

public static void main(String args[])

{

// specifying the user id to which the recommendations have to be generated for

int userId=510;

//specifying the number of recommendations to be generated

int noOfRecommendations=5;

try

{

// Data model created to accept the input file

FileDataModel dataModel = new FileDataModel(newFile("D://input.txt"));

/*Specifies the Similarity algorithm*/

ItemSimilarity itemSimilarity = newPearsonCorrelationSimilarity(dataModel);

/*Initalizing the recommender */

ItemBasedRecommender recommender =newGenericItemBasedRecommender(dataModel, itemSimilarity);

//calling the recommend method to generate recommendations

List<RecommendedItem> recommendations =recommender.recommend(userId, noOfRecommendations);

for (RecommendedItem recommendedItem : recommendations)

System.out.println(recommendedItem.getItemID());

}

catch (IOException e) {

// TODO Auto-generated catch block

e.printStackTrace();

} catch (TasteException e) {

// TODO Auto-generated catch block

e.printStackTrace();

}

Note: To get some recommendations you a sufficiently large input file. A few lines of input won’t gain you any recommendations

参考：基于 Apache Mahout 构建社会化推荐引擎：http://www.ibm.com/developerworks/cn/java/j-lo-mahout/

标签 : mahout, tech, 数据挖掘

发表评论

IT瘾于2013年8月19日下午02时56分00秒发布 #