当前位置:   article > 正文

yara 源码学习(二) 规则编译部分_yara匹配规则预编译

yara匹配规则预编译

yara规则的详细信息请参考:

https://yara.readthedocs.io/en/stable/writingrules.html

根据官方文档,yara规则长这个样子:

[1]:yara rule

  1. /*
  2. This is a multi-line comment ...
  3. */
  4. rule silent_banker : banker
  5. {
  6. meta:
  7. description = "This is just an example"
  8. threat_level = 3
  9. in_the_wild = true
  10. strings:
  11. $a = {6A 40 68 00 30 00 00 6A 14 8D 91}
  12. $b = {8D 4D B0 2B C1 83 C0 27 99 6A 4E 59 F7 F9}
  13. $c = "UVODFRYSIHLNWPEJXQZAKCBGMT"
  14. condition:
  15. $a or $b or $c
  16. }

1. /* 。。。。*/这部分是注释,可有可无。相关解释见链接

2.rule 这个是yara的关键词,是一条规则必不可少的部分,其实 rule前边还可以有 global,private 等关键词

3.silent_banker 这个是规则ID( rule identifier ),是用户自定义的部分,一般是用来表示规则的名称。

4.banke   这个是规则标签(Rule tags),主要用于过滤扫描结果。

5.meta   此部分是元数据( Metadata),主要是规则的描述信息,比如作者,日期或者其他信息 

6.strings  此部分是规则字符串(strings),  就是描述样本特征的字符串,可以使用普通字符串,16进制字符串,和正则表达式。

7.condition 此部分是规则的条件部分(condition),主要用来表述怎么组合利用上边的样本特征(strings)及其他的一些条件。

其中,规则中最复杂,也最丰富的是6.7两部分。编写一条规则,最主要的就是编写以上两部分。

注:文章开始处的链接文档是最新版的yara的文档,而此文解析的源码是早期的源码,文档中的一部分特性在此源码中是没有的。

比如xor strings ,base64 strings.

规则编译,即是将[1]处的这样一条条规则转化成内存中的数据结构 YARA_CONTEXT* context;

此部分主要通过lex.l   grammar.y   ast.h,ast.c 这几个文件完成。

lex.l 是规则的词法分析部分。通过flex 程序可编译生成lex.h lex,c文件

grammar.y 是规则的语法分析部分。通过bison程序编译产生 grammarh 和grammar.c文件。

ast.h/ast.c 主要是生成新的rule,string 等内存结构及查找。

 yara程序在规则扫描部分的入口是

parse_rules_string

parse_rules_file

这两个函数。

  1. int parse_rules_string(const char* rules_string, YARA_CONTEXT* context)
  2. {
  3. yyscan_t yyscanner;
  4. YY_BUFFER_STATE state;
  5. yylex_init(&yyscanner);
  6. yyset_extra(context, yyscanner);
  7. state = yy_scan_string(rules_string, yyscanner);
  8. yyset_lineno(1, yyscanner);
  9. yyparse(yyscanner);
  10. yylex_destroy(yyscanner);
  11. return context->errors;
  12. }
  13. int parse_rules_file(FILE* rules_file, YARA_CONTEXT* context)
  14. {
  15. yyscan_t yyscanner;
  16. yylex_init(&yyscanner);
  17. #ifdef DEBUG
  18. yyset_debug(1, yyscanner);
  19. #endif
  20. yyset_in(rules_file, yyscanner);
  21. yyset_extra(context, yyscanner);
  22. yyparse(yyscanner); //这个是语法(grammar)分析器的入口 yylex是词法分析器的入口
  23. yylex_destroy(yyscanner);
  24. return context->errors;
  25. }

BNF与ABNF 巴斯克范式

[Flex&Bison]协同工作简介

 

扫描流程:

yyparse函数(grammar.c) 种调用YYLEX获取标识符,当匹配到用户定义的BNF范式后,进入一个大大的swithch,调用用户调用的各种归约函数(reduce_*):

  1. yyreduce:
  2. /* yyn is the number of a rule to reduce with. */
  3. yylen = yyr2[yyn];
  4. /* If YYLEN is nonzero, implement the default value of the action:
  5. `$$ = $1'.
  6. Otherwise, the following line sets YYVAL to garbage.
  7. This behavior is undocumented and Bison
  8. users should not rely upon it. Assigning to YYVAL
  9. unconditionally makes the parser a bit smaller, and it avoids a
  10. GCC warning that YYVAL may be used uninitialized. */
  11. yyval = yyvsp[1-yylen];
  12. YY_REDUCE_PRINT (yyn);
  13. switch (yyn)
  14. {
  15. case 6: //这里的case 与grammar.y 中的BNF范式的定义顺序基本对应
  16. #line 279 "grammar.y"
  17. {
  18. if (reduce_rule_declaration(yyscanner, (yyvsp[(3) - (9)].c_string),(yyvsp[(1) - (9)].integer),(yyvsp[(4) - (9)].tag),(yyvsp[(6) - (9)].meta),(yyvsp[(7) - (9)].string),(yyvsp[(8) - (9)].term)) != ERROR_SUCCESS)
  19. {
  20. yyerror(yyscanner, NULL);
  21. YYERROR;
  22. }
  23. }
  24. break;
  25. .........................

之后的代码没有什么太难理解的了。

比较有意思的就是ast.c中的new_hex_string函数。

此函数中完整实现了对 ? 通配符 ,[num-num] ,(BYTE|BYTE) 等模式的匹配和处理。

对于这三种模式的相关信息是保存在mask字段中。

 

此部分的解析到此结束,更多信息请参考附录中的源码。

附录:

lex.l   此部分加有注释,格式可能已破坏

  1. /*
  2. Copyright (c) 2007. Victor M. Alvarez [plusvic@gmail.com].
  3. All rights reserved.
  4. Redistribution and use in source and binary forms, with or without
  5. modification, are permitted provided that the following conditions
  6. are met:
  7. 1. Redistributions of source code must retain the above copyright
  8. notice, this list of conditions and the following disclaimer.
  9. 2. Redistributions in binary form must reproduce the above copyright
  10. notice, this list of conditions and the following disclaimer in the
  11. documentation and/or other materials provided with the distribution.
  12. 3. All advertising materials mentioning features or use of this software
  13. must display the following acknowledgement:
  14. This product includes software developed by Victor M. Alvarez and its
  15. contributors.
  16. 4. Neither the name of Victor M. Alvarez nor the names of its contributors
  17. may be used to endorse or promote products derived from this software
  18. without specific prior written permission.
  19. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20. AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  22. ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  23. LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  24. CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  25. SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  26. INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  27. CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  28. ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  29. POSSIBILITY OF SUCH DAMAGE.
  30. */
  31. /* Lexical analyzer for YARA */
  32. %{
  33. #include <math.h>
  34. #include <stdio.h>
  35. #include <string.h>
  36. #include "grammar.h"
  37. #include "xtoi.h"
  38. #include "mem.h"
  39. #include "sizedstr.h"
  40. #include "lex.h"
  41. #include "yara.h"
  42. #define LEX_CHECK_SPACE_OK(data, current_size, max_length) \
  43. if (strlen(data) + current_size >= max_length - 1) \
  44. { \
  45. yyerror(yyscanner, "out of space in lex_buf"); \
  46. yyterminate(); \
  47. }
  48. #define YYTEXT_TO_BUFFER \
  49. { \
  50. char *yptr = yytext; \
  51. LEX_CHECK_SPACE_OK(yptr, yyextra->lex_buf_len, LEX_BUF_SIZE); \
  52. while ( *yptr ) \
  53. { \
  54. *yyextra->lex_buf_ptr++ = *yptr++; \
  55. yyextra->lex_buf_len++; \
  56. } \
  57. }
  58. #ifdef WIN32
  59. #define snprintf _snprintf
  60. #endif
  61. %}
  62. /*flex 的配置信息 reentrant 可重入 bison-bridge配合bison使用 */
  63. %option reentrant bison-bridge
  64. /**/
  65. %option noyywrap
  66. %option nounistd
  67. %option yylineno
  68. %option verbose
  69. %option warn
  70. /*http://postgresqlchina.com/tecdocdetail/1 */
  71. /*%x 定义 开始状态,开始状态代表进入一个特定的状态,在规则段只有定义了特定状态的规则才会匹配,*/
  72. /*这种规则通过<start stat>来标识。例如 定义段定义了 %x xb 则在规则段只有<xb>开头的规则才会匹配,其他的的规则则不会被匹配。*/
  73. %x str
  74. %x regexp
  75. %x include
  76. %x comment
  77. digit [0-9]
  78. letter [a-zA-Z]
  79. hexdigit [a-fA-F0-9]
  80. %%
  81. /*关键字识别 */
  82. "<" { return _LT_; }
  83. ">" { return _GT_; }
  84. "<=" { return _LE_; }
  85. ">=" { return _GE_; }
  86. "==" { return _EQ_; }
  87. "!=" { return _NEQ_; }
  88. "<<" { return _SHIFT_LEFT_; }
  89. ">>" { return _SHIFT_RIGHT_; }
  90. "private" { return _PRIVATE_; }
  91. "global" { return _GLOBAL_; }
  92. "rule" { return _RULE_; }
  93. "meta" { return _META_; }
  94. "strings" { return _STRINGS_; }
  95. "ascii" { return _ASCII_; }
  96. "wide" { return _WIDE_; }
  97. "fullword" { return _FULLWORD_; }
  98. "nocase" { return _NOCASE_; }
  99. "condition" { return _CONDITION_; }
  100. "true" { return _TRUE_; }
  101. "false" { return _FALSE_; }
  102. "not" { return _NOT_; }
  103. "and" { return _AND_; }
  104. "or" { return _OR_; }
  105. "at" { return _AT_; }
  106. "in" { return _IN_; }
  107. "of" { return _OF_; }
  108. "them" { return _THEM_; }
  109. "for" { return _FOR_; }
  110. "all" { return _ALL_; }
  111. "any" { return _ANY_; }
  112. "entrypoint" { return _ENTRYPOINT_; }
  113. "filesize" { return _SIZE_; }
  114. "rva" { return _RVA_; }
  115. "offset" { return _OFFSET_; }
  116. "file" { return _FILE_; }
  117. "section" { return _SECTION_; }
  118. "uint8" { return _UINT8_; }
  119. "uint16" { return _UINT16_; }
  120. "uint32" { return _UINT32_; }
  121. "int8" { return _INT8_; }
  122. "int16" { return _INT16_; }
  123. "int32" { return _INT32_; }
  124. "matches" { return _MATCHES_; }
  125. "contains" { return _CONTAINS_; }
  126. "index" { return _INDEX_; }
  127. /*多行注释识别 */
  128. "/*" { BEGIN(comment); }
  129. <comment>"*/" { BEGIN(INITIAL); }
  130. <comment>(.|\n) { /* skip comments */ }
  131. /*单行注释识别 */
  132. "//"[^\n]* { /* skip single-line comments */ }
  133. include[ \t]+\" {
  134. yyextra->lex_buf_ptr = yyextra->lex_buf;
  135. yyextra->lex_buf_len = 0;
  136. BEGIN(include);
  137. }
  138. <include>[^\"]+ {
  139. YYTEXT_TO_BUFFER;
  140. }
  141. <include>\" {
  142. char buffer[1024];
  143. char *current_file_name;
  144. char *s = NULL;
  145. char *b = NULL;
  146. char *f;
  147. FILE* fh;
  148. YARA_CONTEXT* context = yyget_extra(yyscanner);
  149. if (context->allow_includes)
  150. {
  151. *yyextra->lex_buf_ptr = '\0'; // null-terminate included file path
  152. // move path of current source file into buffer
  153. current_file_name = yr_get_current_file_name(context);
  154. if (current_file_name != NULL)
  155. {
  156. strncpy(buffer, yr_get_current_file_name(context), sizeof(buffer)-1);
  157. buffer[sizeof(buffer)-1] = '\0';
  158. }
  159. else
  160. {
  161. buffer[0] = '\0';
  162. }
  163. // make included file path relative to current source file
  164. s = strrchr(buffer, '/');
  165. #ifdef WIN32
  166. b = strrchr(buffer, '\\'); // in Windows both path delimiters are accepted
  167. #endif
  168. if (s != NULL || b != NULL)
  169. {
  170. f = (b > s)? (b + 1): (s + 1);
  171. strncpy(f, yyextra->lex_buf, sizeof(buffer) - (f - buffer));
  172. buffer[sizeof(buffer)-1] = '\0';
  173. // SECURITY: Potential for directory traversal here.
  174. fh = fopen(buffer, "r");
  175. // if include file was not found relative to current source file, try to open it
  176. // with path as specified by user (maybe user wrote a full path)
  177. if (fh == NULL)
  178. {
  179. // SECURITY: Potential for directory traversal here.
  180. fh = fopen(yyextra->lex_buf, "r");
  181. }
  182. }
  183. else
  184. {
  185. // SECURITY: Potential for directory traversal here.
  186. fh = fopen(yyextra->lex_buf, "r");
  187. }
  188. if (fh != NULL)
  189. {
  190. int error_code = ERROR_SUCCESS;
  191. if ((error_code = yr_push_file_name(context, yyextra->lex_buf)) != ERROR_SUCCESS)
  192. {
  193. if (error_code == ERROR_INCLUDES_CIRCULAR_REFERENCE)
  194. {
  195. yyerror(yyscanner, "includes circular reference");
  196. }
  197. else if (error_code == ERROR_INCLUDE_DEPTH_EXCEEDED)
  198. {
  199. yyerror(yyscanner, "includes circular reference");
  200. }
  201. yyterminate();
  202. }
  203. yr_push_file(context, fh);
  204. yypush_buffer_state(yy_create_buffer(fh, YY_BUF_SIZE, yyscanner), yyscanner);
  205. }
  206. else
  207. {
  208. snprintf(buffer, sizeof(buffer), "can't open include file: %s", yyextra->lex_buf);
  209. yyerror(yyscanner, buffer);
  210. }
  211. }
  212. else // not allowing includes
  213. {
  214. yyerror(yyscanner, "includes are disabled");
  215. yyterminate();
  216. }
  217. BEGIN(INITIAL);
  218. }
  219. <<EOF>> {
  220. YARA_CONTEXT* context = yyget_extra(yyscanner);
  221. FILE* file = yr_pop_file(context);
  222. if (file != NULL)
  223. {
  224. fclose(file);
  225. }
  226. yr_pop_file_name(context);
  227. yypop_buffer_state(yyscanner);
  228. if (!YY_CURRENT_BUFFER)
  229. {
  230. yyterminate();
  231. }
  232. }
  233. /*string 变量名识别 现在好像没有了 */
  234. $({letter}|{digit}|_)*"*" {
  235. yylval->c_string = (char*) yr_strdup(yytext);
  236. return _STRING_IDENTIFIER_WITH_WILDCARD_;
  237. }
  238. /*string 变量名识别 */
  239. $({letter}|{digit}|_)* {
  240. yylval->c_string = (char*) yr_strdup(yytext);
  241. return _STRING_IDENTIFIER_;
  242. }
  243. /*条件部分的 变量名识别 */
  244. #({letter}|{digit}|_)* {
  245. yylval->c_string = (char*) yr_strdup(yytext);
  246. yylval->c_string[0] = '$'; /* replace # by $*/
  247. return _STRING_COUNT_;
  248. }
  249. /*条件部分的 变量名识别 */
  250. @({letter}|{digit}|_)* {
  251. yylval->c_string = (char*) yr_strdup(yytext);
  252. yylval->c_string[0] = '$'; /* replace @ by $*/
  253. return _STRING_OFFSET_;
  254. }
  255. /*ID 识别 */
  256. ({letter}|_)({letter}|{digit}|_)* {
  257. if (strlen(yytext) > 128)
  258. {
  259. yyerror(yyscanner, "indentifier too long");
  260. }
  261. yylval->c_string = (char*) yr_strdup(yytext);
  262. return _IDENTIFIER_;
  263. }
  264. {digit}+(MB|KB){0,1} {
  265. yylval->integer = (size_t) atol(yytext);
  266. if (strstr(yytext, "KB") != NULL)
  267. {
  268. yylval->integer *= 1024;
  269. }
  270. else if (strstr(yytext, "MB") != NULL)
  271. {
  272. yylval->integer *= 1048576;
  273. }
  274. return _NUMBER_;
  275. }
  276. 0x{hexdigit}+ {
声明:本文内容由网友自发贡献,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:【wpsshop博客】
推荐阅读
相关标签
  

闽ICP备14008679号