Tair: 淘宝的key/value解决方案
- duxin - 若海的blog今天我们对外开源了Tair,Tair是由淘宝开发的key/value解决方案,你可以在这里获取更多信息. Tair在淘宝有着大规模的应用,在你登录淘宝、查看商品详情页面、在淘江湖和好友“捣浆糊”等等时候,后面都在直接或间接的和Tair交互. Tair是一个分布式的key/value结构数据的解决方案,系统默认支持基于内存和文件的存储引擎,对应于通常我们所说的缓存和持久化存储.
[email protected]原创,转载请注明出处: http://blog.csdn.net/gdutliuyun827
Key-Value缓存有很多,用的较多的是memcache、redis,他们都是以独立服务的形式运行,在工作中有时需要嵌入一个本地的key-value缓存,当然已经有LevelDb等,但感觉还是太重量级了。
本文实现了一种超级轻量的缓存,
1、实现代码仅仅需要400行;
2、性能高效,value长度在1K时测试速度在每秒200万左右
3、缓存是映射到文件中的,所以没有malloc、free的开销,以及带来的内存泄露、内存碎片等;
4、如果服务挂掉了,重启后缓存内容继续存在;
5、如果把缓存映射到磁盘文件就算机器挂了,缓存中内容还是会存在,当然有可能会出现数据损坏的情况;
6、一定程度上实现了LRU淘汰算法,实现的LRU不是全局的只是一条链上的,所以只能说在一定程序上实现了;
7、稳定,已经在多个项目中运用,线上部署的机器有几十台,运行了大半年了没出过问题;
8、普通的缓存key、value都是字符串的形式,此缓存的key、value都可以是class、struct对象结构使用更方便;
老规矩直接上代码:
template<typename K, typename V> class HashTable { public: HashTable(const char *tablename, uint32_t tableLen, uint32_t nodeTotal); virtual ~HashTable(); bool Add(K &key, V &value) { AutoLock autoLock(m_MutexLock); //check is exist uint32_t nodeId = GetIdByKey(key); if(nodeId != m_InvalidId) return false; nodeId = GetFreeNode(); if(nodeId == m_InvalidId) return false; uint32_t hashCode = key.HashCode(); Entry *tmpNode = m_EntryAddr + nodeId; tmpNode->m_Key = key; tmpNode->m_Code = hashCode; tmpNode->m_Value = value; uint32_t index = hashCode % m_HeadAddr->m_TableLen; AddNodeToHead(index, nodeId); return true; } bool Del(K &key) { AutoLock autoLock(m_MutexLock); uint32_t nodeId = GetIdByKey(key); if(nodeId == m_InvalidId) return false; uint32_t index = key.HashCode() % m_HeadAddr->m_TableLen; return RecycleNode(index, nodeId); } bool Set(K &key, V &value) { AutoLock autoLock(m_MutexLock); uint32_t nodeId = GetIdByKey(key); if(nodeId == m_InvalidId) return false; (m_EntryAddr + nodeId)->m_Value = value; return true; } bool Get(K &key, V &value) { AutoLock autoLock(m_MutexLock); uint32_t nodeId = GetIdByKey(key); if(nodeId == m_InvalidId) return false; value = (m_EntryAddr + nodeId)->m_Value; return true; } bool Exist(K &key) { AutoLock autoLock(m_MutexLock); uint32_t nodeId = GetIdByKey(key); if(nodeId == m_InvalidId) return false; return true; } uint32_t Count() { AutoLock autoLock(m_MutexLock); return m_HeadAddr->m_UsedCount; } //if exist set else add bool Replace(K &key, V &value) { AutoLock autoLock(m_MutexLock); if(Exist(key)) return Set(key, value); else return Add(key, value); } /*********************************************** ****LRU: when visit a node, move it to head **** ************************************************/ //if no empty place,recycle tail bool LruAdd(K &key, V &value, K &recyKey, V &recyValue, bool &recycled) { AutoLock autoLock(m_MutexLock); if(Exist(key)) return false; if(Add(key, value)) return true; uint32_t index = key.HashCode() % m_HeadAddr->m_TableLen; uint32_t tailId = GetTailNodeId(index); if(tailId == m_InvalidId) return false; Entry *tmpNode = m_EntryAddr + tailId; recyKey = tmpNode->m_Key; recyValue = tmpNode->m_Value; recycled = true; RecycleNode(index, tailId); return Add(key, value); } bool LruSet(K &key, V &value) { AutoLock autoLock(m_MutexLock); if(Set(key, value)) return MoveToHead(key); else return false; } bool LruGet(K &key, V &value) { AutoLock autoLock(m_MutexLock); if(Get(key, value)) return MoveToHead(key); else return false; } //if exist set else add; if add failed recycle tail than add bool LruReplace(K &key, V &value, K &recyKey, V &recyValue, bool &recycled) { AutoLock autoLock(m_MutexLock); recycled = false; if(Exist(key)) return LruSet(key, value); else return LruAdd(key, value, recyKey, recyValue, recycled); } void Clear() { AutoLock autoLock(m_MutexLock); m_HeadAddr->m_FreeBase = 0; m_HeadAddr->m_RecycleHead = 0; m_HeadAddr->m_UsedCount = 0; for(uint32_t i = 0; i < m_HeadAddr->m_TableLen; ++i) { (m_ArrayAddr+i)->m_Head = m_InvalidId; (m_ArrayAddr+i)->m_Tail = m_InvalidId; } } int GetRowKeys(vector<K> &keys, uint32_t index) { AutoLock autoLock(m_MutexLock); if(index >= m_HeadAddr->m_TableLen) return -1; keys.clear(); keys.reserve(16); int count = 0; Array *tmpArray = m_ArrayAddr + index; uint32_t nodeId = tmpArray->m_Head; while(nodeId != m_InvalidId) { Entry *tmpNode = m_EntryAddr + nodeId; keys.push_back(tmpNode->m_Key); nodeId = tmpNode->m_Next; ++count; } return count; } void *Padding(uint32_t size) { AutoLock autoLock(m_MutexLock); if(size > m_HeadSize - sizeof(TableHead)) return NULL; else return m_HeadAddr->m_Padding; } private: static const uint32_t m_InvalidId = 0xffffffff; static const uint32_t m_HeadSize = 1024; struct TableHead { uint32_t m_TableLen; uint32_t m_NodeTotal; uint32_t m_FreeBase; uint32_t m_RecycleHead; uint32_t m_UsedCount; char m_TableName[256]; uint32_t m_Padding[0]; }; struct Array { uint32_t m_Head; uint32_t m_Tail; }; struct Entry { V m_Value; K m_Key; uint32_t m_Code; uint32_t m_Next; uint32_t m_Prev; }; size_t m_MemSize; uint8_t *m_MemAddr; TableHead *m_HeadAddr; Array *m_ArrayAddr; Entry *m_EntryAddr; ThreadMutex m_MutexLock; bool MoveToHead(K &key); uint32_t GetIdByKey(K &key); void AddNodeToHead(uint32_t index, uint32_t nodeId); bool MoveNodeToHead(uint32_t index, uint32_t nodeId); bool RecycleNode(uint32_t index, uint32_t nodeId); uint32_t GetTailNodeId(uint32_t index); uint32_t GetFreeNode(); DISABLE_COPY_AND_ASSIGN(HashTable); }; template<typename K, typename V> HashTable<K, V>::HashTable(const char *tablename, uint32_t tableLen, uint32_t nodeTotal) { AbortAssert(tablename != NULL); m_MemSize = m_HeadSize + tableLen*sizeof(Array) + nodeTotal*sizeof(Entry); m_MemAddr = (uint8_t*)MemFile::Realloc(tablename, m_MemSize); AbortAssert(m_MemAddr != NULL); m_HeadAddr = (TableHead*)(m_MemAddr); m_ArrayAddr = (Array*)(m_MemAddr + m_HeadSize); m_EntryAddr = (Entry*)(m_MemAddr + m_HeadSize + tableLen*sizeof(Array)); m_HeadAddr->m_TableLen = tableLen; m_HeadAddr->m_NodeTotal = nodeTotal; strncpy(m_HeadAddr->m_TableName, tablename, sizeof(m_HeadAddr->m_TableName)); if(m_HeadAddr->m_UsedCount == 0)//if first use init array to invalid id { for(uint32_t i = 0; i < tableLen; ++i) { (m_ArrayAddr+i)->m_Head = m_InvalidId; (m_ArrayAddr+i)->m_Tail = m_InvalidId; } m_HeadAddr->m_FreeBase = 0; m_HeadAddr->m_RecycleHead = 0; } } template<typename K, typename V> HashTable<K, V>::~HashTable() { MemFile::Release(m_MemAddr, m_MemSize); } template<typename K, typename V> bool HashTable<K, V>::MoveToHead(K &key) { uint32_t nodeId = GetIdByKey(key); uint32_t index = key.HashCode() % m_HeadAddr->m_TableLen; return MoveNodeToHead(index, nodeId); } template<typename K, typename V> uint32_t HashTable<K, V>::GetIdByKey(K &key) { uint32_t hashCode = key.HashCode(); uint32_t index = hashCode % m_HeadAddr->m_TableLen; Array *tmpArray = m_ArrayAddr + index; uint32_t nodeId = tmpArray->m_Head; while(nodeId != m_InvalidId) { Entry *tmpNode = m_EntryAddr + nodeId; if(tmpNode->m_Code == hashCode && key.Equals(tmpNode->m_Key)) break; nodeId = tmpNode->m_Next; } return nodeId; } template<typename K, typename V> void HashTable<K, V>::AddNodeToHead(uint32_t index, uint32_t nodeId) { if(index >= m_HeadAddr->m_TableLen || nodeId >= m_HeadAddr->m_NodeTotal) return; Array *tmpArray = m_ArrayAddr + index; Entry *tmpNode = m_EntryAddr + nodeId; if(m_InvalidId == tmpArray->m_Head) { tmpArray->m_Head = nodeId; tmpArray->m_Tail = nodeId; } else { tmpNode->m_Next = tmpArray->m_Head; (m_EntryAddr + tmpArray->m_Head)->m_Prev = nodeId; tmpArray->m_Head = nodeId; } } template<typename K, typename V> bool HashTable<K, V>::MoveNodeToHead(uint32_t index, uint32_t nodeId) { if(index >= m_HeadAddr->m_TableLen || nodeId >= m_HeadAddr->m_NodeTotal) return false; Array *tmpArray = m_ArrayAddr + index; Entry *tmpNode = m_EntryAddr + nodeId; //already head if(tmpArray->m_Head == nodeId) { return true; } uint32_t nodePrev = tmpNode->m_Prev; uint32_t nodeNext = tmpNode->m_Next; (m_EntryAddr+nodePrev)->m_Next = nodeNext; if(nodeNext != m_InvalidId) { (m_EntryAddr+nodeNext)->m_Prev = nodePrev; } else { tmpArray->m_Tail = nodePrev; } tmpNode->m_Prev = m_InvalidId; tmpNode->m_Next = tmpArray->m_Head; (m_EntryAddr + tmpArray->m_Head)->m_Prev = nodeId; tmpArray->m_Head = nodeId; return true; } template<typename K, typename V> bool HashTable<K, V>::RecycleNode(uint32_t index, uint32_t nodeId) { if(index >= m_HeadAddr->m_TableLen || nodeId >= m_HeadAddr->m_NodeTotal) return false; Array *tmpArray = m_ArrayAddr + index; Entry *tmpNode = m_EntryAddr + nodeId; uint32_t nodePrev = tmpNode->m_Prev; uint32_t nodeNext = tmpNode->m_Next; if(nodePrev != m_InvalidId) { (m_EntryAddr + nodePrev)->m_Next = nodeNext; } else { tmpArray->m_Head = nodeNext; } if(nodeNext != m_InvalidId) { (m_EntryAddr + nodeNext)->m_Prev = nodePrev; } else { tmpArray->m_Tail = nodePrev; } (m_EntryAddr+nodeId)->m_Next = m_HeadAddr->m_RecycleHead; m_HeadAddr->m_RecycleHead = nodeId; --(m_HeadAddr->m_UsedCount); return true; } template<typename K, typename V> uint32_t HashTable<K, V>::GetTailNodeId(uint32_t index) { if(index >= m_HeadAddr->m_TableLen) return m_InvalidId; Array *tmpArray = m_ArrayAddr + index; return tmpArray->m_Tail; } template<typename K, typename V> uint32_t HashTable<K, V>::GetFreeNode() { uint32_t nodeId = m_InvalidId; if(m_HeadAddr->m_UsedCount < m_HeadAddr->m_FreeBase)//get from recycle list { nodeId = m_HeadAddr->m_RecycleHead; m_HeadAddr->m_RecycleHead = (m_EntryAddr+nodeId)->m_Next; ++(m_HeadAddr->m_UsedCount); } else if(m_HeadAddr->m_UsedCount < m_HeadAddr->m_NodeTotal)//get from free mem { nodeId = m_HeadAddr->m_FreeBase; ++(m_HeadAddr->m_FreeBase); ++(m_HeadAddr->m_UsedCount); } else { nodeId = m_InvalidId; } //init node if(nodeId < m_HeadAddr->m_NodeTotal) { Entry *tmpNode = m_EntryAddr + nodeId; memset(tmpNode, 0, sizeof(Entry)); tmpNode->m_Next = m_InvalidId; tmpNode->m_Prev = m_InvalidId; } return nodeId; }