HTTP Parser and message builder / objects in plain C Snapshot
Classes | Typedefs | Functions
uri.c File Reference
#include "uri.h"
#include "charclass.h"
#include <string.h>
#include <arpa/inet.h>
#include "sutils.h"

Go to the source code of this file.

Classes

struct  tagURIPARSECTX

Typedefs

typedef struct tagURIPARSECTX URIPARSECTX

Functions

M_INLINE int is_mark (int8_t ch)
M_INLINE int is_unreserved (int8_t ch)
M_INLINE int is_reserved (int8_t ch)
char * ctx_copy_string_raw (URIPARSECTX *ctx, char *start, char *end)
void ctx_add_escaped_char (URIPARSECTX *ctx, char ch, int char_encoded)
char * ctx_finish_escaped_string (URIPARSECTX *ctx)
void ctx_undo_escaped_string (URIPARSECTX *ctx)
M_INLINE int parse_escaped (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_uric (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_uric_sequence (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_pchar (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_pchar_sequence (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_segment (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_path_segments (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_ipv4_address (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_userinfo (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_domainlabel (char *ptr, char **next)
M_INLINE int parse_hostname (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_ipv6_address (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_hostport (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_server (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_authority (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_scheme (URIPARSECTX *ctx, char *line, char **next)
M_INLINE int parse_abs_path (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_net_path (URIPARSECTX *ctx, char *ptr, char **next)
M_INLINE int parse_uric_no_slash (URIPARSECTX *ctx, char *ptr, char **next)
int parse_opaque_part (URIPARSECTX *ctx, char *ptr, char **next)
int parse_hier_part (URIPARSECTX *ctx, char *ptr, char **next, int parse_opaque)
int URI_parse (URI *url, char *line)

Typedef Documentation

typedef struct tagURIPARSECTX URIPARSECTX

Function Documentation

void ctx_add_escaped_char ( URIPARSECTX ctx,
char  ch,
int  char_encoded 
)

Definition at line 46 of file uri.c.

{
  M_UNUSED( char_encoded );
  * ctx->cdata_pos = ch;

  * ( ctx->cdata_is_escaped_start + ( ctx->cdata_pos - ctx->cdata_pos_off ) ) =  (char) char_encoded;

  ++ ctx->cdata_pos;
 
//ctx->cdata_pos - ctx->cdata_pos_start
}
char* ctx_copy_string_raw ( URIPARSECTX ctx,
char *  start,
char *  end 
)

Definition at line 35 of file uri.c.

{
  char *ret = ctx->cdata_raw_pos;

  strncpy( ctx->cdata_raw_pos, start, end - start );
  ctx->cdata_raw_pos += end - start;
  * ctx->cdata_raw_pos ++ ='\0';

  return ret;
}
char* ctx_finish_escaped_string ( URIPARSECTX ctx)

Definition at line 58 of file uri.c.

{
  char *ret = ctx->cdata_pos_start;
 
  * ctx->cdata_pos ++ = '\0';
  ctx->cdata_pos_start = ctx->cdata_pos;
 
 return ret;
}
void ctx_undo_escaped_string ( URIPARSECTX ctx)

Definition at line 68 of file uri.c.

{
  ctx->cdata_pos = ctx->cdata_pos_start;
}
M_INLINE int is_mark ( int8_t  ch)

Definition at line 10 of file uri.c.

                                {
   return ch == '-' || ch == '_' || ch == '.' || ch == '!' || ch == '~' || ch == '*' || ch == '\'' || ch == '(' || ch == ')';
}
M_INLINE int is_reserved ( int8_t  ch)

Definition at line 20 of file uri.c.

                                     {
    return ch == ';' || ch == '/' || ch == '?' || ch == ':' || ch == '@' || ch == '&' || ch == '=' || ch == '+' || ch == '$' || ch == ',';
}
M_INLINE int is_unreserved ( int8_t  ch)

Definition at line 15 of file uri.c.

                                       {
    return is_alphanum(ch) || is_mark(ch);
}
M_INLINE int parse_abs_path ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 407 of file uri.c.

{
    char *start;

    start = ptr;

    if (*ptr == '/') {
      ctx_add_escaped_char( ctx, '/', 0 );
      *next = ++ptr;
    }
    if (parse_path_segments( ctx, ptr, next ) < 0) {
      return -1;
    }
    if (*next != ptr) {
           ctx->rep->path_raw = ctx_copy_string_raw(ctx, start, *next );
           ctx->rep->path =  ctx_finish_escaped_string(ctx); 
    }
    return 0;
}
M_INLINE int parse_authority ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 377 of file uri.c.

{
  if (!parse_server( ctx, ptr, next)) {
    return 0;
  }
  return -1;
}
M_INLINE int parse_domainlabel ( char *  ptr,
char **  next 
)

Definition at line 241 of file uri.c.

{
   if (!is_alphanum( *ptr) ) {
     return -1;
   }
   for(ptr += 1; is_alphanum( *ptr ) || *ptr == '-'; ++ptr);
   
   if (!is_alphanum( *(ptr-1) ) ) {
     return -1;
   }
   *next = ptr;

   return 0;
}
M_INLINE int parse_escaped ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 75 of file uri.c.

{  
  int high,low;
  int unescaped_char;

  if (*ptr != '%') {
    return 1;
  }

  high = is_hex_ext( *(ptr + 1) );
  if (! high ) {
    return -1;
  }

  low = is_hex_ext( *(ptr + 2) );  
  if (! low ) {
    return -1;
  }
  
  unescaped_char = (high << 4) | low;
  if (! (unescaped_char >=0 && unescaped_char <= 0x1F) ) {
    ctx_add_escaped_char( ctx, unescaped_char, 1 );
  }

  *next = ptr + 3;
  return 0;
} 
int parse_hier_part ( URIPARSECTX ctx,
char *  ptr,
char **  next,
int  parse_opaque 
)

Definition at line 472 of file uri.c.

{
  char *start;

  if (ptr[0] == '/') {
    if (ptr[1] == '/') {
      if (parse_net_path( ctx, ptr + 2, next )) {
        return -1;
      }
    } else {
    //ctx_add_escaped_char( ctx, '/', 0 );
      if (parse_abs_path( ctx, ptr, next )) {
        return -1;
      }
    } 
  } else {
    if (parse_opaque) {
       return parse_opaque_part( ctx, ptr, next );
    }
    return -1;
  }

  ptr = *next;
  if (*ptr == '?') {
 
    ptr ++;
    start = ptr;
    if ( parse_uric_sequence( ctx, ptr, next ) == -1 ) {
      return -1;
    }
    ptr = *next;
    ctx->rep->query_raw = ctx_copy_string_raw(ctx, start, ptr );
    ctx->rep->query =  ctx_finish_escaped_string(ctx); 
  }

  if ( *ptr != '#' ) {
    return 0;
  }

  ++ ptr;
  start = ptr;
  if (parse_uric_sequence( ctx, ptr, next ) == -1) {
    return -1;
  }
  ptr = *next;
  ctx->rep->fragment_raw = ctx_copy_string_raw(ctx, start, ptr );
  ctx->rep->fragment =  ctx_finish_escaped_string(ctx); 
  
  return 0;
}
M_INLINE int parse_hostname ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 259 of file uri.c.

{
   char *last_component;
   char *start = ptr;

   for ( ; ; ) {
       last_component = ptr;
       if (*ptr == '/') {
         goto ok;
       }
       if (parse_domainlabel( ptr, next) < 0) {
         return -1;
       }
       ptr = *next;
 
       if (*ptr != '.') { 
         break;
       }
       ++ptr;
    }
    // check that last component is top label
    if (is_digit( * last_component ) ) {
       return -1;
    }

ok:
    *next = ptr;
 
    ctx->rep->flags |= URI_FLAGS_HOST_HOSTNAME;   
    ctx->rep->host = ctx_copy_string_raw(ctx, start, ptr );
    
    return 0;
}
M_INLINE int parse_hostport ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 331 of file uri.c.

{
   char *start;

   if ( ! parse_ipv4_address( ctx, ptr, next ) ) {
     goto pport;
   }

   if ( ! parse_hostname( ctx, ptr, next) ) {
     goto pport;
   }

   if ( ! parse_ipv6_address( ctx, ptr, next ) ) {
     goto pport;
   }

   return -1;

pport:

   ptr = *next;

   if (*ptr != ':' ) {
      return 0;
   }
   
   for( start = ptr = ptr + 1; is_digit( *ptr ); ++ ptr );
   *next = ptr;

   ctx->rep->port = atoi( start );
   return 0;
}
M_INLINE int parse_ipv4_address ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 184 of file uri.c.

                                                                             {
  int i;

  char *start = ptr;

  for (i=0; i < 4; i++) {
    if (! is_digit(*ptr)) {
      return -1;
    }
    for( ++ ptr; is_digit( *ptr ); ++ptr );
    if ( i == 3) {
      break;
    }
    if (*ptr != '.') {
      return -1;
    } 
    ++ ptr;
  }

  *next = ptr;
  ctx->rep->flags |= URI_FLAGS_HOST_IPv4;  
  ctx->rep->host = ctx_copy_string_raw(ctx, start, ptr );

  return 0;
}
M_INLINE int parse_ipv6_address ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 294 of file uri.c.

{ 
   struct in6_addr addr;
   char *dup, *start;
   int rt;


   if (*ptr != '[') {
     return 1;
   }

   start = ptr;
   for(;*ptr != ']' && *ptr != '\0'; ++ ptr);
   if (*ptr != ']') {
     return -1;
   }

   dup = strdup_range( start+1, ptr );
   if (!dup) {
     return -1;
   }
   rt = inet_pton( AF_INET6, dup, &addr); 
   free(dup);
   if (rt == 1) {
     ctx->rep->flags |= URI_FLAGS_HOST_IPv6;   
     ctx->rep->host = ctx_copy_string_raw(ctx, start, ptr );
     
     *next = ptr + 1;
     return 0;
   }
   return -1;
}
M_INLINE int parse_net_path ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 428 of file uri.c.

{
   if (! parse_authority( ctx, ptr, next )) {
      ptr = *next;
      if (*ptr == '/') {
         return parse_abs_path( ctx, ptr, next );
      }
   }

   return 0;
}
int parse_opaque_part ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 453 of file uri.c.

{
   char *start = ptr;
   int rt;
   if ( (rt = parse_uric_no_slash( ctx, ptr, next )) != 0 ) {
     return rt;
   }
   if (parse_uric_sequence( ctx, ptr, next ) < 0) {
     return -1;
   }
   ctx->rep->flags |= URI_FLAGS_IS_OPAQUE;  
   ctx->rep->opaque_raw = ctx_copy_string_raw(ctx, start, ptr );
   ctx->rep->opaque =  ctx_finish_escaped_string(ctx); 


   return 0;
}
M_INLINE int parse_path_segments ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 168 of file uri.c.

                                                                             {
  if (parse_segment( ctx, ptr, next ) < 0) {
    return -1;
  }
  ptr = *next;
  while (*ptr == '/') {
    ctx_add_escaped_char( ctx, '/', 0 );
    ptr = *next = ptr + 1;
    if (parse_segment( ctx, ptr, next ) < 0) {
      return -1;
    }
    ptr = *next;
  }
  return 0;
}
M_INLINE int parse_pchar ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 127 of file uri.c.

{
  char ch = *ptr;

  if (is_unreserved( ch ) || ch == ':' || ch == '@' || ch == '&' || ch == '=' || ch == '+' || ch == '$' || ch == ',') {
    ctx_add_escaped_char( ctx, ch, 0 );
    *next = ptr + 1;
    return 0;
  }
  return parse_escaped(  ctx, ptr, next );
}
M_INLINE int parse_pchar_sequence ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 139 of file uri.c.

{
  int rt;

  while( (rt = parse_pchar( ctx, ptr, next )) == 0 ) {
    ptr = *next;
  }
  return 0;
}
M_INLINE int parse_scheme ( URIPARSECTX ctx,
char *  line,
char **  next 
)

Definition at line 387 of file uri.c.

{
    char *start = line;

    if (is_alpha( *line )) {
       ++line;
       while( is_alphanum( *line ) || *line == '+' || *line == '-' || *line == '.') {
         ++line;
       }
       if ( *line == ':') {
           ctx->rep->flags |=  URI_FLAGS_HAS_SCHEME; 
           ctx->rep->scheme = ctx_copy_string_raw(ctx, start, line );
           * next = line + 1;
           return 0;
       }
    } 

    return -1;
}
M_INLINE int parse_segment ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 150 of file uri.c.

{
  if (parse_pchar_sequence( ctx, ptr, next ) < 0) {
    return -1;
  }
  
  ptr = *next;
  if (*ptr == ';') {
    ctx_add_escaped_char( ctx, ';', 0 );
    ptr = *next = ptr + 1;
    if (parse_pchar_sequence( ctx, ptr, next ) < 0) {
      return -1;
    }
  }
  return 0;
}
M_INLINE int parse_server ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 364 of file uri.c.

{
  int rt ;

  rt = parse_userinfo(ctx, ptr, next);
  if (rt < 0) {
    return -1;
  }
  ptr = *next;
  return parse_hostport( ctx, ptr, next);
}
M_INLINE int parse_uric ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 104 of file uri.c.

{
   if (is_reserved( *ptr ) || is_unreserved( *ptr ) ) {
     ctx_add_escaped_char( ctx, *ptr, 0 );
     *next = ptr +1;
     return 0;
   }    
   return parse_escaped( ctx, ptr, next );
}
M_INLINE int parse_uric_no_slash ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 440 of file uri.c.

{
   if (is_unreserved( *ptr ) || *ptr == ';' || *ptr == '?' || *ptr == ':' || *ptr == '@' || *ptr == '&'
            || *ptr == '=' || *ptr == '+' || *ptr == '$' || *ptr == ',') {
     ctx_add_escaped_char( ctx, *ptr, 0);
     *next = ptr +1;
     return 0;
   }    
   return parse_escaped( ctx, ptr, next );
}
M_INLINE int parse_uric_sequence ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 115 of file uri.c.

{
  int rt;

  while( (rt = parse_uric(  ctx, ptr, next )) == 0 ) {
    ptr = *next;
  }
  return rt;
}
M_INLINE int parse_userinfo ( URIPARSECTX ctx,
char *  ptr,
char **  next 
)

Definition at line 213 of file uri.c.

{
    char *start = ptr;
    int rt;

    while ( *ptr  != '@' ) {
       if ( is_unreserved( *ptr ) || *ptr == ';' || *ptr ==  ':' || *ptr ==  '&' 
                || *ptr ==  '=' || *ptr ==  '+' || *ptr ==  '$' || *ptr ==  ',') {
         ctx_add_escaped_char( ctx, *ptr, 0 );
         ++ ptr;
         continue;
       } 
       if ((rt = parse_escaped( ctx, ptr, next )) != 0) {
         ctx_undo_escaped_string( ctx );  
         *next = start;
         return rt;
       }  
       ptr = *next;
    }

    ctx->rep->userinfo_raw = ctx_copy_string_raw(ctx, start, ptr );
    ctx->rep->userinfo =  ctx_finish_escaped_string(ctx); 
    *next = ptr + 1;

    return 0;
}