本文共 7256 字,大约阅读时间需要 24 分钟。
#include < hash.h > /* Some useful hash function. It's not a particularly good hash function (<< 5 would be better than << 4), but people believe in it because it comes from Dragon book. */ unsigned int hashpjw ( const unsigned char * x, unsigned int len) // From Dragon book, p436 { unsigned int h = 0 ; unsigned int g; for (; len > 0 ; len -- ) { h = (h << 4 ) + * x ++ ; if ((g = h & 0xf0000000 ) != 0 ) h = (h ^ (g >> 24 )) ^ g; } return h;} 水煮coreutils 之三 完美hash
什么叫做完美的hash?
先可以参考下这篇文章
下面是我的理解,一次性计算出输入字符串的位置,没有重复值.复杂的说就是够造一个有限自动机了.
gperf 可以帮助我们做这件事情,第一是用这个比较简单,第二呢就是这个比较酷!
程序员总喜欢弄些奇奇怪怪的东西在代码里显示自己的卓尔不凡,虽然这个被现代软件工程所鄙视,但是自由软件要的就是
自由!在什么地方使用完美hash?命令行解析,前面的链接已经详细说明了,个人感觉在速度要求高,输入集合确定,查询效率要求高的地方使用完美hash比较合适,毕竟天下没有白吃的午餐,总是空间换速度!
用文字表达思想总是那么的不擅长,还是写个例子先,一切尽在不言中啊!
% { /* C code that goes verbatim in output */ #include < stdio.h > #include < stdlib.h > #include < string .h > % } struct tl{ const char * name ; const char * s2;}; %% 881050971 , " aaaaa " 881123701 , " bbbbb " 881122057 , " ccccc " 881044216 , " ddddd " 881052006 , " eeeee " 881160639 , " fffff " 881120640 , " ggggg " %% int main( int argc, char ** argv){ char str[ 32 ]; const struct tl * str2; while (scanf( " %s " ,str)){ str2 = in_word_set(str,strlen(str));printf( " %s " ,str2 -> s2);} return 0 ;} 运行命令:
gperf - t - L C tlh.gperf > tl.c
那来see see tl.c
/* C code produced by gperf version 3.0.3 */ /* Command-line: gperf -t -L C tlh.gperf */ /* Computed positions: -k'4,8' */ #if !((' ' == 32) && ('!' == 33) && ('"' == 34) && ('#' == 35) && ( ' % ' == 37 ) && ( ' & ' == 38 ) && ( ' ' ' == 39 ) && ( ' ( ' == 40 ) && ( ' ) ' == 41 ) && ( ' * ' == 42 ) && ( ' + ' == 43 ) && ( ' , ' == 44 ) && ( ' - ' == 45 ) && ( ' . ' == 46 ) && ( ' / ' == 47 ) && ( ' 0 ' == 48 ) && ( ' 1 ' == 49 ) && ( ' 2 ' == 50 ) && ( ' 3 ' == 51 ) && ( ' 4 ' == 52 ) && ( ' 5 ' == 53 ) && ( ' 6 ' == 54 ) && ( ' 7 ' == 55 ) && ( ' 8 ' == 56 ) && ( ' 9 ' == 57 ) && ( ' : ' == 58 ) && ( ' ; ' == 59 ) && ( ' < ' == 60 ) && ( ' = ' == 61 ) && ( ' > ' == 62 ) && ( ' ? ' == 63 ) && ( ' A ' == 65 ) && ( ' B ' == 66 ) && ( ' C ' == 67 ) && ( ' D ' == 68 ) && ( ' E ' == 69 ) && ( ' F ' == 70 ) && ( ' G ' == 71 ) && ( ' H ' == 72 ) && ( ' I ' == 73 ) && ( ' J ' == 74 ) && ( ' K ' == 75 ) && ( ' L ' == 76 ) && ( ' M ' == 77 ) && ( ' N ' == 78 ) && ( ' O ' == 79 ) && ( ' P ' == 80 ) && ( ' Q ' == 81 ) && ( ' R ' == 82 ) && ( ' S ' == 83 ) && ( ' T ' == 84 ) && ( ' U ' == 85 ) && ( ' V ' == 86 ) && ( ' W ' == 87 ) && ( ' X ' == 88 ) && ( ' Y ' == 89 ) && ( ' Z ' == 90 ) && ( ' [ ' == 91 ) && ( ' / ' == 92 ) && ( ' ] ' == 93 ) && ( ' ^ ' == 94 ) && ( ' _ ' == 95 ) && ( ' a ' == 97 ) && ( ' b ' == 98 ) && ( ' c ' == 99 ) && ( ' d ' == 100 ) && ( ' e ' == 101 ) && ( ' f ' == 102 ) && ( ' g ' == 103 ) && ( ' h ' == 104 ) && ( ' i ' == 105 ) && ( ' j ' == 106 ) && ( ' k ' == 107 ) && ( ' l ' == 108 ) && ( ' m ' == 109 ) && ( ' n ' == 110 ) && ( ' o ' == 111 ) && ( ' p ' == 112 ) && ( ' q ' == 113 ) && ( ' r ' == 114 ) && ( ' s ' == 115 ) && ( ' t ' == 116 ) && ( ' u ' == 117 ) && ( ' v ' == 118 ) && ( ' w ' == 119 ) && ( ' x ' == 120 ) && ( ' y ' == 121 ) && ( ' z ' == 122 ) && ( ' { ' == 123 ) && ( ' | ' == 124 ) && ( ' } ' == 125 ) && ( ' ~ ' == 126 )) /* The character set is not based on ISO-646. */ error " gperf generated tables don't work with this execution character set. Please report a bug to <bug-gnu-gperf@gnu.org>. " #endif #line 1 "tlh.gperf" /* C code that goes verbatim in output */ #include < stdio.h > #include < stdlib.h > #include < string .h > #line 8 "tlh.gperf" struct tl{ const char * name ; const char * s2;}; #define TOTAL_KEYWORDS 7 #define MIN_WORD_LENGTH 9 #define MAX_WORD_LENGTH 9 #define MIN_HASH_VALUE 9 #define MAX_HASH_VALUE 21 /* maximum key range = 13, duplicates = 0 */ #ifdef __GNUC____inline #else #ifdef __cplusplusinline #endif #endif static unsigned int hash (str, len) register const char * str; register unsigned int len;{ static unsigned char asso_values[] = { 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 0 , 5 , 2 , 22 , 4 , 7 , 2 , 22 , 0 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 , 22 }; return len + asso_values[(unsigned char )str[ 7 ] + 1 ] + asso_values[(unsigned char )str[ 3 ]];}#ifdef __GNUC____inline#ifdef __GNUC_STDC_INLINE____attribute__ ((__gnu_inline__)) #endif #endif struct tl * in_word_set (str, len) register const char * str; register unsigned int len;{ static struct tl wordlist[] = { { "" }, { "" }, { "" }, { "" }, { "" }, { "" }, { "" }, { "" }, { "" }, #line 10 "tlh.gperf" { " 881050971 " , " aaaaa " }, { "" }, #line 13 "tlh.gperf" { " 881044216 " , " ddddd " }, { "" }, { "" }, #line 14 "tlh.gperf" { " 881052006 " , " eeeee " }, { "" }, #line 12 "tlh.gperf" { " 881122057 " , " ccccc " }, { "" }, #line 15 "tlh.gperf" { " 881160639 " , " fffff " }, #line 11 "tlh.gperf" { " 881123701 " , " bbbbb " }, { "" }, #line 16 "tlh.gperf" { " 881120640 " , " ggggg " } }; if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) { register int key = hash (str, len); if (key <= MAX_HASH_VALUE && key >= 0 ) { register const char * s = wordlist[key].name; if ( * str == * s && ! strcmp (str + 1 , s + 1 )) return & wordlist[key]; } } return 0 ;} #line 17 "tlh.gperf" int main( int argc, char ** argv){ char str[ 32 ]; const struct tl * str2; while (scanf( " %s " ,str)){ str2 = in_word_set(str,strlen(str));printf( " %s " ,str2 -> s2);} return 0 ;} 生成代码没有太多出彩的地方,主要就是构建了asso_values 这个静态结构,以什么原则生成这个结构就要看gperf的代码了.
我们注意到,gperf 自动生成了两个函数hash 和 in_word_set ,hash 函数返回一个int值 对应每个key 返回值唯一,
in_word_set 返回key 对应的结构指针.
看gperf的代码的时候注意到它使用的一个hash函数
#include < hash.h > /* Some useful hash function. It's not a particularly good hash function (<< 5 would be better than << 4), but people believe in it because it comes from Dragon book. */ unsigned int hashpjw ( const unsigned char * x, unsigned int len) // From Dragon book, p436 { unsigned int h = 0 ; unsigned int g; for (; len > 0 ; len -- ) { h = (h << 4 ) + * x ++ ; if ((g = h & 0xf0000000 ) != 0 ) h = (h ^ (g >> 24 )) ^ g; } return h;}
有意思的是函数头的注释,不知道 Dragon book 是什么,在google上也没有看出什么来好像很有名的样子.
不行!以后要多写字 少贴些代码.
转载地址:http://pskmi.baihongyu.com/