Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
/*
* Copyright 2016-2023, Cypress Semiconductor Corporation (an Infineon company) or
* an affiliate of Cypress Semiconductor Corporation. All rights reserved.
*
* This software, including source code, documentation and related
* materials ("Software") is owned by Cypress Semiconductor Corporation
* or one of its affiliates ("Cypress") and is protected by and subject to
* worldwide patent protection (United States and foreign),
* United States copyright laws and international treaty provisions.
* Therefore, you may use this Software only as provided in the license
* agreement accompanying the software package from which you
* obtained this Software ("EULA").
* If no EULA applies, Cypress hereby grants you a personal, non-exclusive,
* non-transferable license to copy, modify, and compile the Software
* source code solely for use in connection with Cypress's
* integrated circuit products. Any reproduction, modification, translation,
* compilation, or representation of this Software except as specified
* above is prohibited without the express written permission of Cypress.
*
* Disclaimer: THIS SOFTWARE IS PROVIDED AS-IS, WITH NO WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, NONINFRINGEMENT, IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. Cypress
* reserves the right to make changes to the Software without notice. Cypress
* does not assume any liability arising out of the application or use of the
* Software or any product or circuit described in the Software. Cypress does
* not authorize its products for use in any products where a malfunction or
* failure of the Cypress product may reasonably be expected to result in
* significant property damage, injury or death ("High Risk Product"). By
* including Cypress's product in a High Risk Product, the manufacturer
* of such system or application assumes all risk of such use and in doing
* so agrees to indemnify Cypress against all liability.
*/
/*****************************************************************************
**
** Name: xml_parse.c
**
** File: XML Parser
**
**
*****************************************************************************/
/* The XML Parser is dependent on the Object Store. At present
** the object store resides in GOEP and hence the parser is
** dependent on GOEP. The parser only uses the Object Store
** in GOEP, so if the Object Store is separated from GOEP in the
** future, the parser will not be dependent on GOEP.
*/
#include <stdlib.h>
#include <string.h>
#include "xml_pars_api.h"
#include "wiced_bt_utils.h"
#ifndef BIP_TRACE_XML
#define BIP_TRACE_XML FALSE
#endif
#if (defined(BIP_TRACE_XML) && BIP_TRACE_XML == TRUE)
#define XML_TRACE_DEBUG0(m) {BT_TRACE_0(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m);}
#define XML_TRACE_DEBUG1(m,p1) {BT_TRACE_1(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1);}
#define XML_TRACE_DEBUG2(m,p1,p2) {BT_TRACE_2(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1,p2);}
#define XML_TRACE_DEBUG3(m,p1,p2,p3) {BT_TRACE_3(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1,p2,p3);}
#define XML_TRACE_DEBUG4(m,p1,p2,p3,p4) {BT_TRACE_4(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1,p2,p3,p4);}
#define XML_TRACE_DEBUG5(m,p1,p2,p3,p4,p5) {BT_TRACE_5(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1,p2,p3,p4,p5);}
#define XML_TRACE_DEBUG6(m,p1,p2,p3,p4,p5,p6) {BT_TRACE_6(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1,p2,p3,p4,p5,p6);}
#else
#define XML_TRACE_DEBUG0(m)
#define XML_TRACE_DEBUG1(m,p1)
#define XML_TRACE_DEBUG2(m,p1,p2)
#define XML_TRACE_DEBUG3(m,p1,p2,p3)
#define XML_TRACE_DEBUG4(m,p1,p2,p3,p4)
#define XML_TRACE_DEBUG5(m,p1,p2,p3,p4,p5)
#define XML_TRACE_DEBUG6(m,p1,p2,p3,p4,p5,p6)
#endif
/*****************************************************************************
** Constants
*****************************************************************************/
#define XML_ST '<'
#define XML_GT '>'
#define XML_QM '?'
#define XML_EX '!'
#define XML_EM '/' /* End Mark */
#define XML_CO ':'
#define XML_EQ '='
#define XML_SQ '\''
#define XML_DQ '"'
#define XML_AM '&'
#define XML_SC ';'
#define XML_PD '#'
#define XML_HX 'x'
#define XML_HY '-'
#define XML_LB '['
#define XML_LT_STR "lt"
#define XML_GT_STR "gt"
#define XML_AMP_STR "amp"
#define XML_APOS_STR "apos"
#define XML_QUOT_STR "quot"
#define XML_QTAG_END_STR "?>"
#define XML_COMM_STR "--"
#define XML_COMM_END_STR "-->"
#define XML_CDS_STR "[CDATA["
#define XML_CDS_END_STR "]]>"
#define XML_DOCT_STR "<'\""
static const UINT8 xml_name_srch[] = ":=/> \t\n\r";
/*****************************************************************************
** Type Definitions
*****************************************************************************/
enum
{
XML_PASS_WS,
XML_SKIP_WS,
XML_NORM_WS
};
typedef UINT16 tXML_WS_OP;
/*****************************************************************************
** Globals
**
** The global below is used as the buffer set (tXML_BFR_SET) in a local
** variable (of type tXML_MUL_STATE) in XML_Parse. The buffer set memory, is
** separated from the rest of tXML_MUL_STATE to make it easy to change the
** allocation of its memory if found necessary. See xml_alloc_bfr_set.
*****************************************************************************/
/*****************************************************************************
** Macro Functions
*****************************************************************************/
#define XML_EOS(p_st) ((p_st)->curr_res <= 0) /* End Of Store */
/* white space: " ", \t, \r, \n */
#define XML_IS_WS(c) (((c) == 0x20) || ((c) == 0x9) || \
((c) == 0xD) || ((c) == 0xA) || \
((c) == 0x00) )
/*****************************************************************************
** Function Prototypes
*****************************************************************************/
static BOOLEAN xml_get_next(tXML_MUL_STATE *, tXML_WS_OP);
static BOOLEAN xml_find_ch(tXML_MUL_STATE *, UINT8, tXML_WS_OP);
static void xml_incr_pars_res(tXML_MUL_STATE *, tXML_RESULT);
static void xml_set_bfr(tXML_MUL_STATE *, UINT8);
/* parsing static functions */
static BOOLEAN xml_elems(tXML_MUL_STATE *, BOOLEAN);
static BOOLEAN xml_qm_elem(tXML_MUL_STATE *);
static BOOLEAN xml_ex_elem(tXML_MUL_STATE *, BOOLEAN);
static BOOLEAN xml_tag_elem(tXML_MUL_STATE *);
static BOOLEAN xml_etag_elem(tXML_MUL_STATE *);
#define XML_SET_CLEAR 0
#define XML_SET_NAME 1
#define XML_SET_VALUE 2
/*****************************************************************************
** API Functions
*****************************************************************************/
void XML_InitPars(tXML_MUL_STATE *p_st, tXML_CBACK xml_cback, void *p_usr_data)
{
memset(p_st, 0, sizeof(tXML_MUL_STATE));
p_st->cback = xml_cback;
p_st->p_usr_data = p_usr_data;
/* by memset()
p_st->p_data_bfr = NULL;
p_st->next_token = 0;
p_st->curr_res = 0;
p_st->pars_res = XML_SUCCESS;
p_st->skip_next_nl = FALSE;
p_st->prefix.p = NULL;
p_st->name.p = NULL;
p_st->value.p = NULL;
p_st->prefix.len= 0;
p_st->name.len = 0;
p_st->value.len = 0;
p_st->status = XML_STS_INIT;
*/
}
/*****************************************************************************
**
** Function XML_MulParse
**
** Description
** The current implementation of the xml_pars_api supports only those
** XML-contructs needed in BPP SOAP-messages. The parser must have a
** small footprint and is therefore small and simple.
**
** According to SOAP a message must not contain the doctypedecl construct
** (production) and it must not contain Processing Instructions (PI
** production), i.e. these constructs are not supported. In addition,
** CDATA sections, any external or internal entities and the XML
** Declaration are not supported (not used in BPP). Should any of these
** be included in a message being parsed, they will be reported returning
** a warning code. The parser will then try to find the next tag.
** When the parser reports an XML-event using the callback it will always
** continue, even if the callback returns false. All strings in event
** data passed with the callback are limited to 64 bytes in size, except
** the prefix string which has 32 as max size. Consequtive XML_CHARDATA
** events are not supported. Leading and trailing white space is removed
** from the value string before sending the XML_CHARDATA event.
**
** This function and also all other helping static parsing functions use
** more than one return statement in a function. The reason is that
** a parse error has been found and to exit as soon as possible.
** If one had used only one return in each function, the path
** representing a correct xml syntax had been expressed with very deeply
** nested if-statements.
**
** Parameters
** see h-file
** Returns
** see h-file
*****************************************************************************/
tXML_RESULT XML_MulParse(tXML_MUL_STATE *p_st, tXML_OS *p_os)
{
BOOLEAN found;
BOOLEAN query, partial = FALSE;
BOOLEAN parse_ok = TRUE;
int keep_size;
int buffer_used;
tXML_RESULT res = XML_SUCCESS;
tXML_RESULT old_pars_res;
p_st->curr_res = 1; /* not EOS */
memcpy(&p_st->xml_os, p_os, sizeof(tXML_OS));
old_pars_res = p_st->pars_res;
p_st->pars_res = XML_SUCCESS;
p_st->prefix.len = 0;
p_st->name.len = 0;
p_st->value.len = 0;
p_st->p_last_stm = 0;
p_st->p_copy = 0;
#if ((defined (BIP_TRACE_XML) && BIP_TRACE_XML == TRUE) || (defined FOLDER_DEBUG_XML && FOLDER_DEBUG_XML== TRUE))
XML_TRACE_DEBUG4("XML_MulParse status:%d, pars_res: %d, begin:%d, end:%d",
p_st->status, old_pars_res, p_os->p_begin, p_os->p_end);
#endif
/* this do-while(0) loop is to avoid too many return statements in this routine.
* it's easier to "cleanup" with only one return statement */
if (p_st->status == XML_STS_INIT)
{
p_st->p_cur = p_os->p_begin;
#if ((defined (BIP_TRACE_XML) && BIP_TRACE_XML == TRUE) || (defined FOLDER_DEBUG_XML && FOLDER_DEBUG_XML== TRUE))
XML_TRACE_DEBUG1("p_cur:%d", p_st->p_cur);
#endif
do
{
if (!xml_get_next(p_st, XML_PASS_WS)) /* obj store empty or err */
{
res = XML_OBJ_ST_EMPTY;
break;
}
found = FALSE;
while (!XML_EOS(p_st) && !found)
{ /* skip all but top element */
if (!xml_find_ch(p_st, XML_ST, XML_PASS_WS) ||
!xml_get_next(p_st, XML_PASS_WS))
{
res = XML_ERR;
break;
}
if (p_st->next_token == XML_QM)
{
parse_ok = xml_qm_elem(p_st);
}
else if (p_st->next_token == XML_EX)
{
parse_ok = xml_ex_elem(p_st, TRUE);
}
else if (p_st->next_token == XML_EM)
{
parse_ok = FALSE;
if (!xml_get_next(p_st, XML_PASS_WS))
{
res = XML_ERR;
break;
}
}
else
{
found = TRUE;
parse_ok = TRUE;
}
if (!parse_ok)
xml_incr_pars_res(p_st, XML_ERR);
}
} while (0);
p_st->status = XML_STS_1STM;
}
else if (old_pars_res == XML_NO_PROP)
{
}
else
{
#if ((defined (BIP_TRACE_XML) && BIP_TRACE_XML == TRUE) || (defined FOLDER_DEBUG_XML && FOLDER_DEBUG_XML== TRUE))
XML_TRACE_DEBUG2("p_st->last_bfr.p:%d, p_st->used_last_bfr:%d",
p_st->last_bfr.p, p_st->used_last_bfr);
#endif
/* if there was some data left, read it here. */
if (p_st->partial_st.used_last_bfr )
{
memcpy(p_st->last_bfr.p, p_st->partial_st.last_bfr.p, p_st->partial_st.used_last_bfr);
p_st->used_last_bfr = p_st->partial_st.used_last_bfr;
p_st->last_bfr.p[p_st->partial_st.used_last_bfr] = 0;
p_st->event_data.part.parse = p_st->partial_st.event_data.part.parse;
/* set length to 0 */
p_st->partial_st.used_last_bfr = 0;
XML_TRACE_DEBUG1("retrieved PARTIAL data = [%s]\n", p_st->last_bfr.p);
p_st->p_cur = p_st->last_bfr.p;
/* continuation packet */
/* read a ch, setup xml_set_bfr */
xml_get_next(p_st, XML_PASS_WS);
p_st->event_data.copy.p_begin = p_st->xml_os.p_begin;
p_st->event_data.copy.last.p = p_st->last_bfr.p;
p_st->event_data.copy.last.len = p_st->used_last_bfr;
p_st->cback(XML_COPY, &(p_st->event_data), p_st->p_usr_data);
}
else
{
if (p_st->used_last_bfr == 0)
{
p_st->p_cur = p_os->p_begin;
xml_get_next(p_st, XML_PASS_WS);
}
else
return XML_NO_MEM;
}
#if ((defined (BIP_TRACE_XML) && BIP_TRACE_XML == TRUE) || (defined FOLDER_DEBUG_XML && FOLDER_DEBUG_XML== TRUE))
XML_TRACE_DEBUG1("p_st->p_cur:%d", p_st->p_cur);
#endif
}
XML_TRACE_DEBUG0("XML_MulParse end while");
if (res == XML_SUCCESS)
{
/* here we found "<(a-z)" */
if (!XML_EOS(p_st))
{
if (p_st->status == XML_STS_1STM)
{
/* remeber the beginning position right after '<' in the first line */
/* if the first line can't be parsed at first round, save it to the second parse */
p_st->p_copy = p_st->p_cur - 1;
parse_ok = xml_tag_elem(p_st);
}
/* parsed the first line */
XML_TRACE_DEBUG0("XML_MulParse exit xml_tag_elem");
if (!parse_ok)
{
query = p_st->cback(XML_QUERY, &(p_st->event_data), p_st->p_usr_data);
/* if first line parsing is not completed while reach the end of stack, ERROR occurs */
if (query == TRUE)
xml_incr_pars_res(p_st, XML_ERR);
else /* first line parsing to be continued, copy partial data at later point*/
partial = TRUE;
}
else /* first line is parsed ok, change parsing status */
p_st->status = XML_STS_1TAG;
if (!XML_EOS(p_st) && parse_ok)
{
parse_ok = xml_elems(p_st, parse_ok);
query = p_st->cback(XML_QUERY, &(p_st->event_data), p_st->p_usr_data);
if (parse_ok == FALSE || query == FALSE)
{
partial = TRUE;
}
else
p_st->status = XML_STS_DONE;
}
/* copy partial data if any */
if (partial)
{
if(p_st->pars_res == XML_NO_PROP)
{
p_st->p_cur = p_st->p_copy;
p_st->event_data.part.parse = p_st->pars_res;
p_st->event_data.part.p_keep = p_st->p_cur;
XML_TRACE_DEBUG1("p_st->p_cur:%d (last_stm)", p_st->p_cur);
p_st->cback(XML_PARTIAL, &(p_st->event_data), p_st->p_usr_data);
xml_incr_pars_res(p_st, XML_NO_END);
}
else
{
if( p_st->last_bfr.p &&
(p_st->p_copy > p_st->xml_os.p_begin) &&
(p_st->p_copy < p_st->xml_os.p_end) )
{
keep_size = p_st->xml_os.p_end - p_st->p_copy;
if (keep_size < p_st->last_bfr.len)
{
/* store the partial data to a temporary buffer,
NOT to the queue of buffers as it would overwrite current ones! */
XML_TRACE_DEBUG0("Store partial data\n");
BCM_STRNCPY_S((char *)p_st->partial_st.last_bfr.p, 512, (char *)p_st->p_copy, keep_size);
p_st->partial_st.used_last_bfr= keep_size;
p_st->partial_st.last_bfr.p[keep_size] = 0;
p_st->partial_st.event_data.part.parse = p_st->pars_res;
p_st->partial_st.event_data.part.p_keep= p_st->last_bfr.p;
p_st->cback(XML_PARTIAL, &(p_st->event_data), p_st->p_usr_data);
xml_incr_pars_res(p_st, XML_NO_END);
}
}
else if (p_st->last_bfr.p)
{
keep_size = p_st->xml_os.p_end - p_st->xml_os.p_begin;
for (buffer_used = 0; buffer_used < 512; buffer_used++)
{
if (p_st->partial_st.last_bfr.p[buffer_used] == 0)
break;
}
XML_TRACE_DEBUG1("buffer_used %d", buffer_used);
if ((buffer_used < 512) && (keep_size + buffer_used < p_st->last_bfr.len))
{
/* store the partial data to a temporary buffer,
NOT to the queue of buffers as it would overwrite current ones! */
XML_TRACE_DEBUG0("Store partial data\n");
BCM_STRNCPY_S((char *)p_st->partial_st.last_bfr.p + buffer_used, 512, (char *)p_st->xml_os.p_begin, keep_size);
p_st->partial_st.used_last_bfr= keep_size + buffer_used;
p_st->partial_st.last_bfr.p[keep_size + buffer_used] = 0;
p_st->partial_st.event_data.part.parse = p_st->pars_res;
p_st->partial_st.event_data.part.p_keep= p_st->last_bfr.p;
p_st->cback(XML_PARTIAL, &(p_st->event_data), p_st->p_usr_data);
xml_incr_pars_res(p_st, XML_NO_END);
}
else
{
XML_TRACE_DEBUG0("Insufficient temp buffer of size 512!!");
}
}
}/* else NO_PROP */
} /* end of partial */
} /* end of !XML_EOS(p_st) */
} /* end of res == XML_SUCCESS */
return p_st->pars_res;
}
/*****************************************************************************
** Static Functions
*****************************************************************************/
/*****************************************************************************
**
** Function xml_set_bfr
**
** Description
** Sets the buffer that is going to be used when tokens are pushed from
** p_st->next_token into some buffer in the buffer set.
**
** Parameters
** p_st (in/out) : the parser state
** p_bfr (in) : the buffer that will get all tokens (characters)
** NULL is allowed in which case no buffer is used.
** bfr_max_ind (in) : the max index into the buffer in which a non-null
** char may be stored
**
** Returns
** -
*****************************************************************************/
static void xml_set_bfr(tXML_MUL_STATE *p_st, UINT8 set)
{
switch(set)
{
case XML_SET_NAME:
p_st->name.p = p_st->p_cur - 1;
p_st->p_data_bfr = p_st->name.p;
p_st->name.len = 0;
break;
case XML_SET_VALUE:
p_st->value.p = p_st->p_cur - 1;
p_st->p_data_bfr = p_st->value.p;
p_st->value.len = 0;
break;
default:
p_st->p_data_bfr = NULL;
}
}
/*****************************************************************************
**
** Function xml_write_bfr
**
** Description
** Pushes (copies) the character from p_st->next_token to the buffer, if
** any, that has been set calling xml_set_bfr.
**
** Parameters
** p_st (in/out) : the parser state
**
** Returns
** -
*****************************************************************************/
static void xml_write_bfr(tXML_MUL_STATE *p_st)
{
if (p_st->p_data_bfr)
{
if(p_st->p_data_bfr == p_st->name.p)
p_st->name.len++;
else
p_st->value.len++;
}
}
/*****************************************************************************
**
** Function xml_incr_pars_res
**
** Description
** Sets the final parsing result if the new_res provided has
** higher rank than the current parsing result.
**
** Parameters
** p_st (in/out) : the parser state
** new_res (in) : the new parsing result
**
** Returns
** -
*****************************************************************************/
static void xml_incr_pars_res(tXML_MUL_STATE *p_st, tXML_RESULT new_res)
{
if (new_res > p_st->pars_res)
{
switch(p_st->pars_res)
{
/* preserve these error messages */
case XML_OBJ_ST_EMPTY:
case XML_NO_MEM: /* no last_bfr.p, and the tXML_MUL_STATE is not in init */
case XML_NO_PROP: /* run out of tXML_PROP */
break;
default:
/*
case XML_SUCCESS:
case XML_WARNING:
case XML_ERR:
*/
p_st->pars_res = new_res;
break;
}
}
}
/*****************************************************************************
**
** Function xml_read_char
**
** Description
*****************************************************************************/
static void xml_read_char(tXML_MUL_STATE *p_st)
{
BOOLEAN get_new = FALSE;
if (p_st->p_cur && p_st->p_cur >= p_st->last_bfr.p && p_st->p_cur < (p_st->last_bfr.p + p_st->used_last_bfr))
{
/* left over from previous parse */
p_st->next_token = *p_st->p_cur;
if(p_st->next_token == 0)
{
/* leftover is done, use the new one */
p_st->p_cur = p_st->xml_os.p_begin;
p_st->last_bfr.p[0] = 0;
p_st->used_last_bfr = 0;
get_new = TRUE;
}
else
{
p_st->p_cur++;
p_st->curr_res = 1;
}
}
else
{
if(p_st->p_cur == (p_st->last_bfr.p + p_st->used_last_bfr))
{
p_st->used_last_bfr = 0;
p_st->p_cur = p_st->xml_os.p_begin;
}
get_new = TRUE;
}
if(get_new)
{
if(p_st->p_cur && p_st->p_cur < p_st->xml_os.p_end)
{
/* use buffer given to XML_Parse */
p_st->next_token = *p_st->p_cur;
p_st->p_cur++;
p_st->curr_res = 1;
}
else
p_st->curr_res = 0;
}
/*
XML_TRACE_DEBUG4("xml_read_char p_cur: x%x, curr_res:%d, get_new:%d, token:%c",
p_st->p_cur, p_st->curr_res, get_new, p_st->next_token);
*/
}
/*****************************************************************************
**
** Function xml_get_next
**
** Description
** Writes the character in p_st->next_token to the current buffer if set.
** Then the next character is read from the Object Store into
** p_st->next_token. The first time get_next is called, the current
** buffer must be NULL, i.e p_st->data_bfr must be NULL.
**
** xml_get_next handles end-of-line as specified in the xml spec. It
** passes, skips or normalizes (p.29 in XML spec) white spaces (ws)
** as specified in the ws_op param. Note, the ws_op applies when
** getting one (or many characters) from Object Store into the
** p_st->next_token. It does not apply when pushing the (initial)
** p_st->next_token to the current buffer.
**
** The characters are read one by one from the Object Store.
** Presently this is not anticipated to cause any problems
** regarding reading speed. Should it become a problem in the
** future, a new buffer could be introduced into which a chunk
** of characters could be put, using one Object Store read call.
** The get_next function would then get the next character from
** the new buffer.
**
** Parameters
** p_st (in/out) : the parser state
** ws_op (in) : the requested white space handling.
**
** Returns
** True if a character was successfully read into p_st->next_token.
** False otherwise.
*****************************************************************************/
static BOOLEAN xml_get_next(tXML_MUL_STATE *p_st, tXML_WS_OP ws_op)
{
xml_write_bfr(p_st);
do
{
xml_read_char(p_st);
} while ((ws_op == XML_SKIP_WS) && XML_IS_WS(p_st->next_token) &&
!XML_EOS(p_st));
/* handle end-of-line if any after the do-while above */
if (!XML_EOS(p_st) && (p_st->next_token == 0xA) && p_st->skip_next_nl)
{ /* we have previously found 0xD (cr) and have set the state var
** p_st->skip_next_nl,see below
*/
xml_read_char(p_st);
}
p_st->skip_next_nl = FALSE;
if (XML_EOS(p_st))
{
p_st->next_token = 0;
return FALSE;
}
if (p_st->next_token == 0xD)
{
p_st->next_token = 0xA;
p_st->skip_next_nl = TRUE;
}
if ((ws_op == XML_NORM_WS) &&
((p_st->next_token == 0xA) || (p_st->next_token == 0x9)))
{
p_st->next_token = 0x20;
}
return TRUE;
}
/*****************************************************************************
**
** Function xml_find_ch
**
** Description
** Searches for the character given in ch. It starts searching in
** p_st->next_token and if not found it gets characters from the Object
** Store until ch is in p_st->next_token.
**
** Parameters
** p_st (in/out) : the parser state
** ch (in) : the character to search for
** ws_op (in) : the requested white space handling when getting chars
**
** Returns
** True if the character was found.
** False otherwise.
*****************************************************************************/
static BOOLEAN xml_find_ch(tXML_MUL_STATE *p_st, UINT8 ch, tXML_WS_OP ws_op)
{
while (!XML_EOS(p_st) && (p_st->next_token != ch))
xml_get_next(p_st, ws_op);
return (BOOLEAN) !XML_EOS(p_st);
}
/*****************************************************************************
**
** Function xml_find_ch_n
**
** Description
** Same function as xml_find_ch, except that any character in p_str
** that is found will stop the search.
**
** Parameters
** p_st (in/out) : the parser state
** p_str (in) : the string containing the characters searched for.
** Must not be NULL or an empty string.
**
** Returns
** True if any of the characters in p_str was found.
** Fase otherwise.
*****************************************************************************/
static BOOLEAN xml_find_ch_n(tXML_MUL_STATE *p_st, const UINT8 *p_str)
{
const UINT8 *p_tmp;
while (!XML_EOS(p_st))
{
for (p_tmp = p_str; *p_tmp; p_tmp++)
{
if (p_st->next_token == *p_tmp)
return TRUE;
}
xml_get_next(p_st, XML_PASS_WS);
}
return FALSE;
}
/*****************************************************************************
**
** Function xml_find_str
**
** Description
** Searches for p_str (i.e the exact sequence of characters in p_str) in
** the input from Object Store. The function ends with the character
** succeeding p_str in the input, (i.e that char is in p_st->next_token
** upon return) or with XML_EOS.
**
** Parameters
** p_st (in/out) : the parser state
** p_str (in) : the string to search for and pass by.
** Must not be NULL or an empty string.
**
** Returns
** True if the string was found.
** False otherwise.
*****************************************************************************/
static BOOLEAN xml_find_str(tXML_MUL_STATE *p_st, const UINT8 *p_str)
{
const UINT8 *p_tmp;
p_tmp = p_str;
while (*p_tmp && !XML_EOS(p_st))
{
for (p_tmp = p_str; *p_tmp && !XML_EOS(p_st); p_tmp++)
{
if (p_st->next_token != *p_tmp)
break;
xml_get_next(p_st, XML_PASS_WS);
}
if ((p_tmp == p_str) && !XML_EOS(p_st))
{
xml_get_next(p_st, XML_PASS_WS);
}
}
return (BOOLEAN) (*p_tmp == 0);
}
/*****************************************************************************
**
** Function xml_consume_str
**
** Description
** Checks for p_str i.e that the first character from p_str is in
** p_st->next_token and that the successors immediately follows in the
** Object Store. The p_str must not be last in the Object Store.
**
** Parameters
** p_st (in/out) : the parser state
** p_str (in) : the string to check if present next and to pass by
** Must not be NULL.
**
** Returns
** True if the string was found and was not last in the Object Store.
** False otherwise.
*****************************************************************************/
static BOOLEAN xml_consume_str(tXML_MUL_STATE *p_st, const UINT8 *p_str)
{
do
{
if (p_st->next_token != *p_str)
return FALSE;
p_str++;
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
} while (*p_str);
return TRUE;
}
/*****************************************************************************
**
** Function xml_resolve_refs
**
** Description
** Resolves predefined entity references (sect. 4.6 in the XML spec)
** and character references (sect 4.1) that may be found in
** AttValue and content. (According to the XML spec it may also
** be in an EntityValue. However EntityValues are in the
** doctypedecl part which is not supported).
**
** The AttValue and content not beginning with a tag, must be
** stored in the p_st->p_bfr_set->value buffer.
**
** Parameters
** p_st (in/out) : the parser state
**
** Returns
** -
*****************************************************************************/
static void xml_resolve_refs(tXML_MUL_STATE *p_st)
{
UINT8 *p_srch; /* where next search for & starts */
UINT8 *p_am; /* points to found & */
UINT8 *p_sc; /* points to found ; and succeeding chars */
UINT8 *p_start;
UINT8 *p_tmp;
UINT32 ch_code;
UINT32 tmp_code;
INT8 i;
BOOLEAN resolved;
UINT16 len_left;
p_srch = p_st->value.p;
len_left = p_st->value.len;
do
{
p_start = p_srch;
p_am = (UINT8*) strchr((char*) p_srch, XML_AM);
p_sc = p_am ? (UINT8*) strchr((char*) p_am, XML_SC) : NULL;
/* make sure the ptr does not exceed the end of the value str */
if(p_sc > (len_left + p_start))
p_sc = NULL;
if (p_am && p_sc)
{
resolved = FALSE;
p_tmp = p_am + 1;
*p_sc = 0; /* terminate the ref by replacing ; with 0 */
if (*p_tmp == XML_PD) /* character ref */
{
if (p_tmp[1] == XML_HX)
*p_tmp = '0';
else
{
for(p_tmp++; *p_tmp == '0'; p_tmp++)
{
;
}
}
ch_code = utl_strtoul((char*) p_tmp, NULL, 0);
/* skip leading zero bytes */
for (i = 3; (i >= 0) && !(ch_code >> i * 8); i--)
{
;
}
p_tmp = p_am;
while (i >= 0)
{
/* mask out one byte and shift it rightmost */
/* preceding bytes must be zero so shift left first */
tmp_code = ch_code << ((3-i) * 8);
*p_tmp = (UINT8) (tmp_code >> 24);
p_tmp++;
i--;
}
resolved = TRUE;
}
else if (p_tmp < p_sc) /* check if predefined ref */
{
resolved = TRUE;
if (strcmp((char*) p_tmp, XML_LT_STR) == 0)
{
*p_am = XML_ST;
p_st->value.len = p_st->value.len - 3; /* remove the length for lt; */
p_st->p_cur = p_st->p_cur - 3;
}
else if (strcmp((char*) p_tmp, XML_GT_STR) == 0)
{
*p_am = XML_GT;
p_st->value.len = p_st->value.len - 3; /* remove the length for gt; */
p_st->p_cur = p_st->p_cur - 3;
}
else if (strcmp((char*) p_tmp, XML_AMP_STR) == 0)
{
*p_am = XML_AM;
p_st->value.len = p_st->value.len - 4; /* remove the length for amp; */
p_st->p_cur = p_st->p_cur - 4;
}
else if (strcmp((char*) p_tmp, XML_APOS_STR) == 0)
{
*p_am = XML_SQ;
p_st->value.len = p_st->value.len - 5; /* remove the length for apos; */
p_st->p_cur = p_st->p_cur - 5;
}
else if (strcmp((char*) p_tmp, XML_QUOT_STR) == 0)
{
*p_am = XML_DQ;
p_st->value.len = p_st->value.len - 5; /* remove the length for quot; */
p_st->p_cur = p_st->p_cur - 5;
}
else
resolved = FALSE;
}
if (resolved)
{
p_srch = p_tmp; /* will contain char after ; */
p_sc++;
while(*p_sc)
{
*p_tmp++ = *p_sc++;
}
}
else
{
*p_sc = XML_SC; /* restore the ref end */
p_srch = p_sc + 1;
}
} /* end if */
} while (*p_srch && p_am && p_sc);
}
/*****************************************************************************
**
** Function xml_remove_trail_ws
**
** Description
** Removes trailing white space from the p_st->p_data_bfr buffer.
**
** Parameters
** p_st (in/out) : the parser state
**
** Returns
** -
*****************************************************************************/
static void xml_remove_trail_ws(tXML_MUL_STATE *p_st)
{
UINT16 xx;
if(p_st->value.p)
{
xx = p_st->value.len;
while(xx && XML_IS_WS(p_st->value.p[xx-1]))
xx--;
p_st->value.len = xx;
}
}
/*****************************************************************************
** Parsing Static Functions
*****************************************************************************/
/*****************************************************************************
**
** Function xml_name
**
** Description
** Parses a name and its prefix if any. The prefix and name buffers
** are set.
** The functions ends with either white space,
** XML_EQ, XML_EM or XML_GT in p_st->next_token or with XML_EOS.
**
** Parameters
** p_st (in/out) : the parser state
**
** Returns
** True if no error was found.
** False otherwise.
*****************************************************************************/
static BOOLEAN xml_name(tXML_MUL_STATE *p_st)
{
BOOLEAN found = FALSE;
p_st->prefix.p = NULL;
p_st->prefix.len = 0;
xml_set_bfr(p_st, XML_SET_NAME);
xml_find_ch_n(p_st, xml_name_srch);
if (!XML_EOS(p_st) && (p_st->next_token == XML_CO))
{
if (p_st->name.len)
{
found = TRUE;
/* p_st->name.len is string size in name buffer, \0 excl.
*/
p_st->prefix.p = p_st->name.p;
p_st->prefix.len = p_st->name.len;
}
xml_get_next(p_st, XML_PASS_WS);
xml_set_bfr(p_st, XML_SET_NAME);
if (!XML_EOS(p_st))
{
xml_find_ch_n(p_st, xml_name_srch + 1);
}
}
found = (BOOLEAN) (found || p_st->name.len);
if(found)
xml_set_bfr(p_st, XML_SET_CLEAR);
return found;
}
/*****************************************************************************
**
** Function xml_attributes
**
** Description
** Parses an attribute list.
** The functions ends with the XML_GT or XML_EM char or XML_EOS.
** Error is reported if the attribute list is last in the Object
** Store.
** Sends a XML_ATTRIBUTE event in the user callback for each
** attribute found.
**
** Parameters
** p_st (in/out) : the parser state
**
** Returns
** True if no error was found.
** False otherwise.
*****************************************************************************/
static BOOLEAN xml_attributes(tXML_MUL_STATE *p_st)
{
BOOLEAN cb_ret = TRUE;
UINT8 q_ch;
XML_TRACE_DEBUG1("[xml_parse] xml_attributes: res= %d", p_st->pars_res);
while ( cb_ret)
{
/* if this is a white space, then the next character is read from the
Object Store into p_st->next_token */
if( XML_IS_WS(p_st->next_token) )
{
if (!xml_get_next(p_st, XML_SKIP_WS))
return FALSE;
}
if (p_st->next_token == XML_EQ)
return FALSE;
if ((p_st->next_token == XML_GT) || (p_st->next_token == XML_EM))
return TRUE;
if (!xml_name(p_st) || XML_EOS(p_st))
{
return FALSE;
}
if(XML_IS_WS(p_st->next_token))
{
if (!xml_get_next(p_st, XML_SKIP_WS))
return FALSE;
}
if (p_st->next_token != XML_EQ)
return FALSE;
if (!xml_get_next(p_st, XML_SKIP_WS))
return FALSE;
if ((p_st->next_token != XML_SQ) && (p_st->next_token != XML_DQ))
return FALSE;
q_ch = p_st->next_token;
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
xml_set_bfr(p_st, XML_SET_VALUE);
if (!xml_find_ch(p_st, q_ch, XML_NORM_WS))
{
return FALSE;
}
xml_set_bfr(p_st, XML_SET_CLEAR);
xml_resolve_refs(p_st);
p_st->event_data.attr.prefix.p = p_st->prefix.p;
p_st->event_data.attr.prefix.len = p_st->prefix.len;
p_st->event_data.attr.name.p = p_st->name.p;
p_st->event_data.attr.name.len = p_st->name.len;
p_st->event_data.attr.value.p = p_st->value.p;
p_st->event_data.attr.value.len = p_st->value.len;
p_st->value.len = 0;
cb_ret = p_st->cback(XML_ATTRIBUTE, &(p_st->event_data), p_st->p_usr_data);
/* TODO chk cback return */
if(cb_ret == FALSE)
{
xml_incr_pars_res(p_st, XML_NO_PROP);
break;
}
if (!xml_get_next(p_st, XML_PASS_WS))
break;
}
return FALSE;
}
/*****************************************************************************
**
** Function xml_elems
**
** Description
** Parses all elements with all their content.This function is not a
** one-to-one mapped implementation of one production from the XML spec.
** Instead it uses a simplified iterative (as opposed to recursive)
** approach when parsing both the element and content productions.
**
** When a parsing error is found, this function tries to recover by
** searching for the next element (tag).
**
** When char data is found, the function sends the XML_CHARDATA event in
** the user callback.
**
** Other static functions with production names, start their parsing
** from the first character in their production. They might check
** that the first character (token) in the production matches
** p_st->next_token, alternatively they can just get rid of the first
** char in the production by calling get_next_ch. The exceptions to this
** are the xml_qm_elem, xml_ex_elem, xml_etag_elem and the xml_tag_elem
** functions which starts with the XML_QM, XML_EX, XML_EM and the first
** char in the tag name, respectively.
**
** Parameters
** p_st (in/out) : the parser state
** prev_ok (in) : if parsing done before calling this function was
** ok. If not, the functions starts with recovering.
**
** Returns
** True if parsing was successful possibly with successful recoveries.
** False if an error was found from which recovery failed (XML_EOS).
*****************************************************************************/
static BOOLEAN xml_elems(tXML_MUL_STATE *p_st, BOOLEAN prev_ok)
{
BOOLEAN tag_found;
BOOLEAN cb_ret = TRUE;
while (!XML_EOS(p_st) && prev_ok)
{
/* remove leading ws even if char data */
if (XML_IS_WS(p_st->next_token))
{
if (!xml_get_next(p_st, XML_SKIP_WS))
return TRUE;
}
tag_found = (BOOLEAN) (p_st->next_token == XML_ST);
if (!tag_found)
{
xml_set_bfr(p_st, XML_SET_VALUE);
tag_found = xml_find_ch(p_st, XML_ST, XML_PASS_WS);
xml_remove_trail_ws(p_st);
if (p_st->value.len > 0)
{
xml_resolve_refs(p_st);
p_st->event_data.ch_data.value.p = p_st->value.p;
p_st->event_data.ch_data.value.len = p_st->value.len;
p_st->event_data.ch_data.last = TRUE;
p_st->value.len = 0;
cb_ret = p_st->cback(XML_CHARDATA, &(p_st->event_data), p_st->p_usr_data);
/* TODO chk cback return */
if(cb_ret == FALSE)
{
xml_incr_pars_res(p_st, XML_NO_PROP);
return FALSE;
}
}
xml_set_bfr(p_st, XML_SET_CLEAR);
if (!tag_found)
return prev_ok;
}
else
{
p_st->p_last_stm = p_st->p_cur - 1;
if (p_st->p_cur)
p_st->p_copy = p_st->p_last_stm;
p_st->cback(XML_TOP, &(p_st->event_data), p_st->p_usr_data);
}
/* tag was found */
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
if (p_st->next_token == XML_QM)
prev_ok = xml_qm_elem(p_st);
else if (p_st->next_token == XML_EX)
{
prev_ok = xml_ex_elem(p_st, FALSE);
}
else if (p_st->next_token == XML_EM)
{
prev_ok = xml_etag_elem(p_st);
}
else
prev_ok = xml_tag_elem(p_st);
if (!prev_ok)
xml_incr_pars_res(p_st, XML_ERR);
}
XML_TRACE_DEBUG1("xml_elems prev_ok:%d", prev_ok);
return prev_ok;
}
/*****************************************************************************
**
** Function xml_qm_elem
**
** Description
** Recognises all productions starting with "<?". That is PI and XML decl.
** These productions are skipped and XML_WARNING is set.
** The function starts with the XML_QM as the first char (is in
** p_st->next_token).It ends with the XML_GT successor (is in
** p_st->next_token) or XML_EOS.
**
** Parameters
** p_st (in/out) : the parser state
**
** Returns
** True if no error was found trying to recognise the start and end of
** the productions. False otherwise.
*****************************************************************************/
static BOOLEAN xml_qm_elem(tXML_MUL_STATE *p_st)
{
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
if (!xml_find_str(p_st, (UINT8*) XML_QTAG_END_STR))
return FALSE;
xml_incr_pars_res(p_st, XML_WARNING);
return TRUE;
}
/*****************************************************************************
**
** Function xml_ex_elem
**
** Description
** Handles all productions starting with "<!". They are Comments, CDSect
** doctypedecl and markupdecl. All are skipped. However, the inpar
** prolog must be set for the function to try to detect the doctypedecl
** and markupdecl beginning.
**
** The function starts with the XML_EX as the first char.
** The function ends with XML_EOS or the char succeeding XML_GT,
** except for doctypedecl and marcupdecl which ends with the next XM_TAG.
**
** Parameters
** p_st (in/out) : the parser state
** prolog (in) : should be set if in prolog in which case the function
** tries to detect (allows) the beginning of doctypedecl
** and markupdecl.
** Returns
** True if no error was found trying to recognise the start and end of
** the productions. False otherwise.
*****************************************************************************/
static BOOLEAN xml_ex_elem(tXML_MUL_STATE *p_st, BOOLEAN prolog)
{
UINT8 q_ch;
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
if (p_st->next_token == XML_HY) /* comment */
{
if (!xml_consume_str(p_st, (UINT8*) XML_COMM_STR))
return FALSE;
if (!xml_find_str(p_st, (UINT8*) XML_COMM_END_STR))
return FALSE;
}
else if (p_st->next_token == XML_LB) /* CDSect */
{
if (!xml_consume_str(p_st, (UINT8*) XML_CDS_STR))
return FALSE;
if (!xml_find_str(p_st, (UINT8*) XML_CDS_END_STR))
return FALSE;
xml_incr_pars_res(p_st, XML_WARNING);
}
else if (prolog) /* doctypedecl or markupdecl */
{
do
{
if (!xml_find_ch_n(p_st, (UINT8*) XML_DOCT_STR))
return FALSE;
if ((p_st->next_token == XML_SQ) || (p_st->next_token == XML_DQ))
{
q_ch = p_st->next_token;
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
if (!xml_find_ch(p_st, q_ch, XML_PASS_WS))
return FALSE;
xml_get_next(p_st, XML_PASS_WS);
}
} while (!XML_EOS(p_st) && (p_st->next_token != XML_ST));
xml_incr_pars_res(p_st, XML_WARNING);
}
else /* error */
{
return FALSE;
}
return TRUE;
}
/*****************************************************************************
**
** Function xml_tag_elem
**
** Description
** Parses a tag element. The function starts with the char succeeding the
** XML_ST char.
** The functions ends with the char succeeding the XML_GT char or
** with XML_EOS.
** Sends the XML_TAG and the XML_TAG_END events in a callback each.
**
** Parameters
** p_st (in/out) : the parser state
**
** Returns
** True if no error was found.
** False otherwise.
*****************************************************************************/
static BOOLEAN xml_tag_elem(tXML_MUL_STATE *p_st)
{
BOOLEAN cb_ret = TRUE;
if (!xml_name(p_st))
return FALSE;
p_st->event_data.tag.prefix.p = p_st->prefix.p;
p_st->event_data.tag.name.p = p_st->name.p;
p_st->event_data.tag.prefix.len = p_st->prefix.len;
p_st->event_data.tag.name.len = p_st->name.len;
p_st->event_data.tag.p_last_stm = p_st->p_last_stm;
cb_ret = p_st->cback(XML_TAG, &(p_st->event_data), p_st->p_usr_data);
if(cb_ret == FALSE)
{
xml_incr_pars_res(p_st, XML_NO_PROP);
return FALSE;
}
/* TODO chk cback return */
if (XML_EOS(p_st))
return FALSE;
if (XML_IS_WS(p_st->next_token))
{
if (!xml_attributes(p_st))
return FALSE;
}
p_st->event_data.empty_elem.end = (BOOLEAN) (p_st->next_token == XML_EM);
if (p_st->event_data.empty_elem.end)
{
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
}
if (p_st->next_token != XML_GT)
return FALSE;
xml_get_next(p_st, XML_PASS_WS);
cb_ret = p_st->cback(XML_TAG_END, &(p_st->event_data), p_st->p_usr_data);
if(cb_ret == FALSE)
{
xml_incr_pars_res(p_st, XML_NO_PROP);
return FALSE;
}
p_st->p_copy = p_st->p_cur - 1;
p_st->cback(XML_TOP, &(p_st->event_data), p_st->p_usr_data);
/* TODO chk cback return */
return TRUE;
}
/*****************************************************************************
**
** Function xml_etag_elem
**
** Description
** Parses an end tag element. The function starts with the XML_EM char.
** The functions ends with the char succeeding the XML_GT char or
** with XML_EOS. Sends the XML_ETAG event in the user callback.
**
** Parameters
** p_st (in/out) : the parser state
**
** Returns
** True if no error was found.
** False otherwise.
*****************************************************************************/
static BOOLEAN xml_etag_elem(tXML_MUL_STATE *p_st)
{
BOOLEAN cb_ret = TRUE;
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
if (!xml_name(p_st))
return FALSE;
p_st->event_data.etag.prefix.p = p_st->prefix.p;
p_st->event_data.etag.name.p = p_st->name.p;
p_st->event_data.etag.name.len = p_st->name.len;
p_st->event_data.etag.prefix.len = p_st->prefix.len;
cb_ret = p_st->cback(XML_ETAG, &(p_st->event_data), p_st->p_usr_data);
if(cb_ret == FALSE)
{
xml_incr_pars_res(p_st, XML_NO_PROP);
return FALSE;
}
p_st->p_copy = (p_st->prefix.p) ? p_st->prefix.p - 2: p_st->name.p - 2;
p_st->cback(XML_TOP, &(p_st->event_data), p_st->p_usr_data);
/* TODO chk cback return ??? */
if (XML_EOS(p_st))
return FALSE;
if (XML_IS_WS(p_st->next_token))
if (!xml_get_next(p_st, XML_SKIP_WS))
return FALSE;
if (p_st->next_token != XML_GT)
return FALSE;
xml_get_next(p_st, XML_PASS_WS);
return TRUE;
}