Why Gemfury? Push, build, and install  RubyGems npm packages Python packages Maven artifacts PHP packages Go Modules Debian packages RPM packages NuGet packages

Repository URL to install this package:

Details    
Size: Mime:
/*
 *      Interactive disassembler (IDA).
 *      Copyright (c) 1990-2024 Hex-Rays
 *      ALL RIGHTS RESERVED.
 *
 */

#ifndef LEX_HPP
#define LEX_HPP

#include <ieee.h>

/*! \file lex.hpp

  \brief Tools for parsing C-like input

  Functions in this file use objects of opaque type lexer_t.
  To create a lexer_t instance, use create_lexer().
*/

typedef ushort lxtype;  ///< see \ref lx_

/// \defgroup lx_ Parser token types
/// See token_t::type
/// \note All separators have their ASCII codes as lxtype
///@{
const lxtype
  lx_end      = 1,      ///< no more tokens
  lx_ident    = 2,      ///< ident
  lx_number   = 3,      ///< long constant
  lx_string   = 4,      ///< string constant (token_t.chr != 0 => unicode string)
  lx_char     = 5,      ///< char constant
  lx_typename = 6,      ///< user-defined type
  lx_float    = 7,      ///< IEEE floating point constant
  lx_int64    = 8,      ///< int64 constant
  lx_key      = 128;    ///< keywords start. All keys are lx_key + keynum. \n
                        ///< Two-char separators are: (c1 + (c2 << 8)).    \n
                        ///< Three-char separators:
                        ///<   - "<<=" = ('<' + ('<'<<8)) + '='
                        ///<   - ">>=" = ('>' + ('>'<<8)) + '='
///@}


/// Parser token
struct token_t
{
  qstring str;          ///< idents & strings
  lxtype type = 0;      ///< see \ref lx_
  sval_t num = 0;       ///< long & char constants
  union
  {
    bool unicode = false; ///< (::lx_string: != 0 => unicode string)
    bool is_unsigned;     ///< (::lx_number, ::lx_int64: != 0 => unsigned value)
  };
  union
  {
    fpvalue_t fnum;     ///< floating point constant
    int64 i64;          ///< ::lx_int64
  };
  token_t() : fnum() {}
};
DECLARE_TYPE_AS_MOVABLE(token_t);

class lexer_t;          // lexical analyzer, opaque structure


/// Preprocessor callback for unknown tokens.
/// Will be called when preprocessor calculates the value of #if expression.

typedef error_t lx_resolver_t(lexer_t *lx, void *ud, token_t *curtok, sval_t *res);

// Conversion from 'type1 ' to 'type_2' is sign-extended. This may cause unexpected runtime behavior.
// We want this sign-extension to happen, since it comes mostly from HANDLEs.
// (see https://msdn.microsoft.com/en-us/library/ms235307.aspx )
MSC_DIAG_OFF(4826)

/// Preprocessor cast
struct cast_t
{
  bool is_unsigned;
  int size;

  cast_t()
  {
    reset();
  }
  void reset(void)
  {
    is_unsigned = false;
    size = 0;
  }
};

struct lex_value_t
{
  bool is_unsigned;
  union
  {
    int64 val;
    uint64 uval;
  };

  lex_value_t()
  {
    reset();
  }
  void set(const lex_value_t &v)
  {
    set_val(v.val, v.is_unsigned);
  }
  void reset(void)
  {
    set_val(0, true);
  }
  void set_val(int64 v, bool _is_unsigned)
  {
    is_unsigned = _is_unsigned;
    val = v;
  }

  uint64 get_uval(void) const
  {
    return val;
  }
  int64 get_val(void) const
  {
    return val;
  }

  bool is_zero(void) const
  {
    return get_val() == 0;
  }

  void perform_cast(const cast_t &cast);

  void unary_minus(const lex_value_t &v);
  void unary_plus(const lex_value_t &v);
  void unary_not(const lex_value_t &v);
  void bitwise_not(const lex_value_t &v);

  void mul(const lex_value_t &v);
  void div(const lex_value_t &v);
  void mod(const lex_value_t &v);
  void add(const lex_value_t &v);
  void sub(const lex_value_t &v);

  void shift_right(const lex_value_t &v);
  void shift_left(const lex_value_t &v);
  void bitwise_and(const lex_value_t &v);
  void bitwise_xor(const lex_value_t &v);
  void bitwise_or(const lex_value_t &v);
  void logical_and(const lex_value_t &v);
  void logical_or(const lex_value_t &v);

  void cmpge(const lex_value_t &v);
  void cmple(const lex_value_t &v);
  void cmplt(const lex_value_t &v);
  void cmpgt(const lex_value_t &v);
  void cmpeq(const lex_value_t &v);
  void cmpneq(const lex_value_t &v);
};

MSC_DIAG_ON(4826)

/// Preprocessor callbacks for casts.
/// Will be called when preprocessor calculates the value of #if expression.

typedef error_t lx_parse_cast_t(lexer_t *lx, cast_t *cast, token_t *ct);

/// Preprocessor callback.
/// It will be called for each input line.
/// \return an error code (0-ok)

typedef int idaapi lx_preprocessor_cb(void *ud, const char *fname, int nl, const char *line);


/// Callback for #pragma directives.
/// \return an error code (0-ok)

typedef int idaapi lx_pragma_cb(void *ud, const char *line);


/// Callback for #warning directives.
/// \return an error code (0-ok)

typedef int idaapi lx_warning_cb(void *ud, const char *line);


/// Callback for #define directives
/// \return an error code (0-ok)

typedef int idaapi lx_macro_cb(
        void *ud,
        const char *name,
        const char *body,
        int nargs,
        bool isfunc,
        bool is_new_macro);


/// Callback for #undef directives
/// \return an error code (0-ok)

typedef int idaapi lx_undef_cb(void *ud, const char *name);

#define LEX_MACRO_SYSTEM 1 ///< define system macros such as `__LINUX__`

/// Create new lexical analyzer and set its keyword table.
/// If keys==nullptr, then set the default C keyword table

idaman lexer_t *ida_export create_lexer(
        const char *const *keys,
        size_t size,
        void *ud=nullptr,
        uint32 macro_flags=0);


/// Destroy a lexical analyzer

idaman void ida_export destroy_lexer(lexer_t *lx);


/// Define a macro

idaman error_t ida_export lex_define_macro(
        lexer_t *lx,
        const char *macro,
        const char *body,
        int nargs=0,
        bool isfunc=false);

/// Undefine a macro
/// \param lx the lexer object
/// \param macro macro to undefine

idaman void ida_export lex_undefine_macro(
        lexer_t *lx,
        const char *macro);

/// Set lexer options.
/// \param lx the lexer object
/// \param options  \ref LXOPT_
/// \return the old options

idaman int ida_export lex_set_options(lexer_t *lx, int options);

/// \defgroup LXOPT_ Lexer options
/// Passed as 'options' parameter to lex_set_options().
/// By default all options are on.
///@{
#define LXOPT_PARSE_FLOATS  0x0001 ///< enable floating point constants
#define LXOPT_REQ_SEPARATOR 0x0002 ///< require a separator between a number and an ident or a character/string constant or dot
#define LXOPT_NOCASE_FILES  0x0004 ///< case-insensitive file search
#define LXOPT_C99_CONSTANTS 0x0008 ///< the size and sign of constants depend on the value itself and the 'U', 'L', and 'LL'
                                   ///< modifier suffixes. Otherwise the constant is always considered as signed and the size
                                   ///< depends only on the number of bytes in the value
///@}


/// Get next token
/// \param lx the lexer object
/// \param t the output buffer for the token
/// \param p_lnnum the line number where the token starts

idaman error_t ida_export lex_get_token(lexer_t *lx, token_t *t, int32 *p_lnnum=nullptr);


/// Enumerate all macros.
/// Do so until 'cb' returns non-zero.

idaman int ida_export lex_enum_macros(
        const lexer_t *lx,
        int idaapi cb(const char *name, const char *body, int nargs, bool isfunc, void *ud),
        void *ud=nullptr);


/// Debug: get text representation of token

idaman const char *ida_export lex_print_token(qstring *buf, const token_t *t);


//-------------------------------------------------------------------------
/// \name String oriented functions
///@{

/// Set the input line and the macro table.
/// if macros==nullptr, the macro table will not be changed.

idaman error_t ida_export lex_init_string(
        lexer_t *lx,
        const char *line,
        void *macros=nullptr);
///@}

//-------------------------------------------------------------------------
/// \name File oriented functions
///@{

/// Initialization: file may be nullptr.
/// Also see lex_term_file().

idaman error_t ida_export lex_init_file(lexer_t *lx, const char *file);


/// Error handling.
/// if level > 0, then return information about the enclosing file which
/// included the current one.

idaman const char *ida_export lex_get_file_line(
        lexer_t *lx,
        int32 *linenum,
        const char **lineptr,
        int level=0);


/// Termination: also see lex_init_file()

idaman void ida_export lex_term_file(lexer_t *lx, bool del_macros);
///@}

//-------------------------------------------------------------------------
/// \name Token stack
///@{
typedef qstack<token_t> tokenstack_t; ///< see get_token(), unget_token()


/// Retrieve token from a stack or lexer.
/// If buf is not empty then get the token on top of the stack.
/// If buf is empty then gen the next token from the lexer.
/// \return success

inline bool get_token(token_t *t, lexer_t *lx, tokenstack_t &buf)
{
  if ( !buf.empty() )
    *t = buf.pop();
  else if ( lex_get_token(lx, t) != eOk )
    return false;
  return true;
}


/// Push a token back onto the token stack

inline void unget_token(const token_t &t, tokenstack_t &buf)
{
  buf.push(t);
}
///@}


/// is NAME a valid C keyword (exclude 'this')

idaman bool ida_export is_c_keyword(const char *name);



#endif  //  LEX_HPP