Simple utilities sink - stuff that doesn't fit anywhere else / objects in plain C Snapshot
|
00001 #ifndef __STRTK_X_Y_H__ 00002 #define __STRTK_X_Y_H__ 00003 00004 #include <cutils/base.h> 00005 00006 #define STRTK_PATTERN_SIZE ( 256 / 8 ) 00007 00008 00009 /** 00010 * @defgroup STRTK 00011 * 00012 * @brief defines a character class. holds a bitmap of characters, that is later used for tokenization. 00013 * 00014 * Functions like strtok and strpbrk build a bitmap of characters when they are called. 00015 * They use this bitmap when these function later parse the argument character 00016 * string, this bitmap is used to check if a character is 00017 * part of the character set or not. 00018 * 00019 * The class STRTK holds the bitmap holding the character set, so that creation 00020 * of this bitmap can be done in separate step from usage, therefore parsing 00021 * function will not create character bitmap over and over again. 00022 * 00023 * I don't know if this is very usefull, a character class bitmap takes 32 bytes, which is two cache lines. 00024 * So two cache lines have to be moved in order to use this stuff. 00025 * 00026 * @{ 00027 */ 00028 00029 typedef struct tagSTRTK { 00030 uint8_t pattern[ STRTK_PATTERN_SIZE ]; 00031 00032 } STRTK; 00033 00034 00035 /** 00036 * @brief initalise a character class 00037 */ 00038 void STRTK_init( STRTK *tok , const char *stop_chars ); 00039 00040 M_INLINE void STRTK_add_char( STRTK *tok, uint8_t ch ) 00041 { 00042 tok->pattern[ ch >> 3 ] |= (1 << (ch & 7)); 00043 } 00044 00045 /** 00046 * @brief macro checks if character is part of character class 00047 */ 00048 #define STRTK_IS_CHAR( tok, ch ) \ 00049 ( (tok)->pattern[ (ch) >> 3 ] && (1 << ((ch) & 7)) ) 00050 00051 /** 00052 * @brief Returns token where all characters belong to argument character class. 00053 * @returns the pointer to the next character that is not part of the character class. 00054 */ 00055 M_INLINE char * STRTK_spn(STRTK *tok, const char *hay) 00056 { 00057 uint8_t *cpos; 00058 00059 for( cpos = (uint8_t *) hay; *cpos != '\0'; ++cpos ) { 00060 if (!STRTK_IS_CHAR( tok, *cpos )) { 00061 break; 00062 } 00063 } 00064 return (char *) cpos; 00065 } 00066 00067 /** 00068 * @brief Returns token where all characters belong to argument character class, argument string is limited. 00069 * @returns the pointer to the next character that is not part of the character class. 00070 */ 00071 M_INLINE char * STRTK_nspn(STRTK *tok, const char *hay, size_t nsize ) 00072 { 00073 uint8_t *cpos; 00074 00075 for( cpos = (uint8_t *) hay; *cpos != '\0' && nsize > 0 ; ++cpos, --nsize ) { 00076 if (!STRTK_IS_CHAR( tok, *cpos )) { 00077 break; 00078 } 00079 } 00080 return (char *) cpos; 00081 } 00082 00083 00084 /** 00085 * @brief tokenize a string . the character class is interpreted as set of white spaces. 00086 * @return pointer to the start of token, end_tok returns position right after the token. 00087 */ 00088 M_INLINE char * STRTK_ntok(STRTK *tok, const char *hay, size_t nsize, char **end_tok ) 00089 { 00090 uint8_t *cpos; 00091 char *start_tok; 00092 00093 // skip leading whitespaces. 00094 for( cpos = (uint8_t *) hay; *cpos != '\0' && nsize > 0 && STRTK_IS_CHAR( tok, *cpos ) ; ++cpos, --nsize ); 00095 00096 start_tok = (char *) cpos; 00097 00098 for( ; *cpos != '\0' && nsize > 0 && !STRTK_IS_CHAR( tok, *cpos ) ; ++cpos, --nsize ); 00099 00100 *end_tok = (char *) cpos; 00101 00102 if (start_tok == (char *) cpos) { 00103 return 0; 00104 } 00105 return start_tok; 00106 } 00107 00108 /** 00109 * @brief tokenize a string . the character class is interpreted as set of white spaces. 00110 * @return pointer to the start of token, end_tok returns position right after the token. 00111 */ 00112 M_INLINE char * STRTK_tok(STRTK *tok, const char *hay, char **end_tok ) 00113 { 00114 uint8_t *cpos; 00115 char *start_tok; 00116 00117 // skip leading whitespaces. 00118 for( cpos = (uint8_t *) hay; *cpos != '\0' && STRTK_IS_CHAR( tok, *cpos ) ; ++cpos); 00119 00120 start_tok = (char *) cpos; 00121 00122 for( ; *cpos != '\0' && !STRTK_IS_CHAR( tok, *cpos ) ; ++cpos); 00123 00124 *end_tok = (char *) cpos; 00125 00126 if (start_tok == (char *) cpos) { 00127 return 0; 00128 } 00129 return start_tok; 00130 } 00131 00132 /** 00133 * @} 00134 */ 00135 00136 00137 #endif 00138