使用Mahout基于用户和物品相似度进行协同过滤推荐内容
基于用户User协同过滤
package mahout;
import java.io.File;
import java.io.IOException;
import java.util.List;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.CachingRecommender;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.JDBCDataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
/**
* 基于用户相似度进行推荐
* mahout基于协同过滤(CF)的推荐
* 用户对物品进行评分,对于无评分只有喜欢、不喜欢的,可以将喜欢置为5分,不喜欢置为1分
* @author itindex
*
*/
public class Mahout {
public static void main(String[] args) throws TasteException, IOException {
//(1)----连接数据库部分
// MysqlDataSource dataSource = new MysqlDataSource();
// dataSource.setServerName("localhost");
// dataSource.setUser("admin");
// dataSource.setPassword("admin");
// dataSource.setDatabaseName("mahoutDB");
// //(2)----使用MySQLJDBCDataModel数据源读取MySQL里的数据
// JDBCDataModel dataModel = new MySQLJDBCDataModel(dataSource, "table1", "userId", "itemId", "preference", "date");
//(3)----数据模型部分
//把MySQLJDBCDataModel对象赋值给DataModel
// DataModel model = dataModel;
DataModel model = new FileDataModel(new File("D:/intro.csv"));
//用户相似度UserSimilarity:包含相似性度量和邻居参数
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
//相邻用户UserNeighborhood
UserNeighborhood neighborhood = new NearestNUserNeighborhood(2, similarity, model);
//一旦确定相邻用户,一个普通的user-based推荐器被构建,构建一个GenericUserBasedRecommender推荐器需要数据源DataModel,用户相似性UserSimilarity,相邻用户相似度UserNeighborhood
Recommender recommender = new CachingRecommender(new GenericUserBasedRecommender(model, neighborhood, similarity));
//向用户1推荐2个商品
List<RecommendedItem> recommendations = recommender.recommend(1, 2);
for(RecommendedItem recommendation : recommendations){
//输出推荐结果
System.out.println(recommendation);
}
}
}
基于物品Item进行协同过滤:
package mahout;
import java.io.File;
import java.util.Collection;
import java.util.List;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.recommender.CachingRecommender;
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.GenericItemSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
/**
* 基于Item 物品相似度进行推荐
* 用户对物品进行评分,对于无评分只有喜欢、不喜欢的,可以将喜欢置为5分,不喜欢置为1分
* @author itindex
*
*/
public class RecommenderIntro {
public static void main(String[] args) throws Exception {
//装载数据文件,实现存储,并为计算提供所需的所有偏好,用户和物品数据
DataModel model = new FileDataModel(new File("D:/intro.csv"));
//基于物品的推荐
Recommender recommender1 ;
ItemSimilarity otherSimilarity = new EuclideanDistanceSimilarity(model);
GenericItemSimilarity similarity2 = new GenericItemSimilarity(otherSimilarity, model);
recommender1 = new CachingRecommender(new GenericItemBasedRecommender(model, similarity2));
//为用户1推荐一件物品1,1
List<RecommendedItem> recommendedItems = recommender1.recommend(1, 2);
//输出
for (RecommendedItem item : recommendedItems) {
System.out.println(item);
System.out.println(item.getItemID());
System.out.println(item.getValue());
}
}
}
intro.csv文件格式:
用户id,物品id,评分
1,10,5
1,12,5
1,14,1
2,12,5
3,10,5
3,12,5
3,13,5
3,14,1
4,10,5