* Copyright 2016-2023, Cypress Semiconductor Corporation (an Infineon company) or
* an affiliate of Cypress Semiconductor Corporation. All rights reserved.
* This software, including source code, documentation and related
* materials ("Software") is owned by Cypress Semiconductor Corporation
* or one of its affiliates ("Cypress") and is protected by and subject to
* worldwide patent protection (United States and foreign),
* United States copyright laws and international treaty provisions.
* Therefore, you may use this Software only as provided in the license
* agreement accompanying the software package from which you
* obtained this Software ("EULA").
* If no EULA applies, Cypress hereby grants you a personal, non-exclusive,
* non-transferable license to copy, modify, and compile the Software
* source code solely for use in connection with Cypress's
* integrated circuit products. Any reproduction, modification, translation,
* compilation, or representation of this Software except as specified
* above is prohibited without the express written permission of Cypress.
* reserves the right to make changes to the Software without notice. Cypress
* does not assume any liability arising out of the application or use of the
* Software or any product or circuit described in the Software. Cypress does
* not authorize its products for use in any products where a malfunction or
* failure of the Cypress product may reasonably be expected to result in
* significant property damage, injury or death ("High Risk Product"). By
* including Cypress's product in a High Risk Product, the manufacturer
* of such system or application assumes all risk of such use and in doing
* so agrees to indemnify Cypress against all liability.
** Name: xml_parse.c
** File: XML Parser
/* The XML Parser is dependent on the Object Store. At present
** the object store resides in GOEP and hence the parser is
** dependent on GOEP. The parser only uses the Object Store
** in GOEP, so if the Object Store is separated from GOEP in the
** future, the parser will not be dependent on GOEP.
#include <stdlib.h>
#include <string.h>
#include "xml_pars_api.h"
#include "wiced_bt_utils.h"
#if (defined(BIP_TRACE_XML) && BIP_TRACE_XML == TRUE)
#define XML_TRACE_DEBUG3(m,p1,p2,p3) {BT_TRACE_3(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1,p2,p3);}
#define XML_TRACE_DEBUG4(m,p1,p2,p3,p4) {BT_TRACE_4(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1,p2,p3,p4);}
#define XML_TRACE_DEBUG5(m,p1,p2,p3,p4,p5) {BT_TRACE_5(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1,p2,p3,p4,p5);}
#define XML_TRACE_DEBUG6(m,p1,p2,p3,p4,p5,p6) {BT_TRACE_6(TRACE_LAYER_GOEP, TRACE_TYPE_DEBUG, m,p1,p2,p3,p4,p5,p6);}
#define XML_TRACE_DEBUG0(m)
#define XML_TRACE_DEBUG1(m,p1)
#define XML_TRACE_DEBUG2(m,p1,p2)
#define XML_TRACE_DEBUG3(m,p1,p2,p3)
#define XML_TRACE_DEBUG4(m,p1,p2,p3,p4)
#define XML_TRACE_DEBUG5(m,p1,p2,p3,p4,p5)
#define XML_TRACE_DEBUG6(m,p1,p2,p3,p4,p5,p6)
** Constants
#define XML_ST '<'
#define XML_GT '>'
#define XML_QM '?'
#define XML_EX '!'
#define XML_EM '/' /* End Mark */
#define XML_CO ':'
#define XML_EQ '='
#define XML_SQ '\''
#define XML_DQ '"'
#define XML_AM '&'
#define XML_SC ';'
#define XML_PD '#'
#define XML_HX 'x'
#define XML_HY '-'
#define XML_LB '['
#define XML_LT_STR "lt"
#define XML_GT_STR "gt"
#define XML_AMP_STR "amp"
#define XML_APOS_STR "apos"
#define XML_QUOT_STR "quot"
#define XML_QTAG_END_STR "?>"
#define XML_COMM_STR "--"
#define XML_COMM_END_STR "-->"
#define XML_CDS_STR "[CDATA["
#define XML_CDS_END_STR "]]>"
#define XML_DOCT_STR "<'\""
static const UINT8 xml_name_srch[] = ":=/> \t\n\r";
** Type Definitions
typedef UINT16 tXML_WS_OP;
** Globals
** The global below is used as the buffer set (tXML_BFR_SET) in a local
** variable (of type tXML_MUL_STATE) in XML_Parse. The buffer set memory, is
** separated from the rest of tXML_MUL_STATE to make it easy to change the
** allocation of its memory if found necessary. See xml_alloc_bfr_set.
** Macro Functions
#define XML_EOS(p_st) ((p_st)->curr_res <= 0) /* End Of Store */
/* white space: " ", \t, \r, \n */
#define XML_IS_WS(c) (((c) == 0x20) || ((c) == 0x9) || \
((c) == 0xD) || ((c) == 0xA) || \
((c) == 0x00) )
** Function Prototypes
static BOOLEAN xml_get_next(tXML_MUL_STATE *, tXML_WS_OP);
static BOOLEAN xml_find_ch(tXML_MUL_STATE *, UINT8, tXML_WS_OP);
static void xml_incr_pars_res(tXML_MUL_STATE *, tXML_RESULT);
static void xml_set_bfr(tXML_MUL_STATE *, UINT8);
/* parsing static functions */
static BOOLEAN xml_elems(tXML_MUL_STATE *, BOOLEAN);
static BOOLEAN xml_qm_elem(tXML_MUL_STATE *);
static BOOLEAN xml_ex_elem(tXML_MUL_STATE *, BOOLEAN);
static BOOLEAN xml_tag_elem(tXML_MUL_STATE *);
static BOOLEAN xml_etag_elem(tXML_MUL_STATE *);
#define XML_SET_CLEAR 0
#define XML_SET_NAME 1
#define XML_SET_VALUE 2
** API Functions
void XML_InitPars(tXML_MUL_STATE *p_st, tXML_CBACK xml_cback, void *p_usr_data)
memset(p_st, 0, sizeof(tXML_MUL_STATE));
p_st->cback = xml_cback;
p_st->p_usr_data = p_usr_data;
/* by memset()
p_st->p_data_bfr = NULL;
p_st->next_token = 0;
p_st->curr_res = 0;
p_st->pars_res = XML_SUCCESS;
p_st->skip_next_nl = FALSE;
p_st->prefix.p = NULL;
p_st->name.p = NULL;
p_st->value.p = NULL;
p_st->prefix.len= 0;
p_st->name.len = 0;
p_st->value.len = 0;
p_st->status = XML_STS_INIT;
** Function XML_MulParse
** Description
** The current implementation of the xml_pars_api supports only those
** XML-contructs needed in BPP SOAP-messages. The parser must have a
** small footprint and is therefore small and simple.
** According to SOAP a message must not contain the doctypedecl construct
** (production) and it must not contain Processing Instructions (PI
** production), i.e. these constructs are not supported. In addition,
** CDATA sections, any external or internal entities and the XML
** Declaration are not supported (not used in BPP). Should any of these
** be included in a message being parsed, they will be reported returning
** a warning code. The parser will then try to find the next tag.
** When the parser reports an XML-event using the callback it will always
** continue, even if the callback returns false. All strings in event
** data passed with the callback are limited to 64 bytes in size, except
** the prefix string which has 32 as max size. Consequtive XML_CHARDATA
** events are not supported. Leading and trailing white space is removed
** from the value string before sending the XML_CHARDATA event.
** This function and also all other helping static parsing functions use
** more than one return statement in a function. The reason is that
** a parse error has been found and to exit as soon as possible.
** If one had used only one return in each function, the path
** representing a correct xml syntax had been expressed with very deeply
** nested if-statements.
** Parameters
** see h-file
** Returns
** see h-file
tXML_RESULT XML_MulParse(tXML_MUL_STATE *p_st, tXML_OS *p_os)
BOOLEAN found;
BOOLEAN query, partial = FALSE;
BOOLEAN parse_ok = TRUE;
int keep_size;
int buffer_used;
tXML_RESULT old_pars_res;
p_st->curr_res = 1; /* not EOS */
memcpy(&p_st->xml_os, p_os, sizeof(tXML_OS));
old_pars_res = p_st->pars_res;
p_st->pars_res = XML_SUCCESS;
p_st->prefix.len = 0;
p_st->name.len = 0;
p_st->value.len = 0;
p_st->p_last_stm = 0;
p_st->p_copy = 0;
XML_TRACE_DEBUG4("XML_MulParse status:%d, pars_res: %d, begin:%d, end:%d",
p_st->status, old_pars_res, p_os->p_begin, p_os->p_end);
/* this do-while(0) loop is to avoid too many return statements in this routine.
* it's easier to "cleanup" with only one return statement */
if (p_st->status == XML_STS_INIT)
p_st->p_cur = p_os->p_begin;
XML_TRACE_DEBUG1("p_cur:%d", p_st->p_cur);
if (!xml_get_next(p_st, XML_PASS_WS)) /* obj store empty or err */
found = FALSE;
while (!XML_EOS(p_st) && !found)
{ /* skip all but top element */
if (!xml_find_ch(p_st, XML_ST, XML_PASS_WS) ||
!xml_get_next(p_st, XML_PASS_WS))
res = XML_ERR;
if (p_st->next_token == XML_QM)
parse_ok = xml_qm_elem(p_st);
else if (p_st->next_token == XML_EX)
parse_ok = xml_ex_elem(p_st, TRUE);
else if (p_st->next_token == XML_EM)
parse_ok = FALSE;
if (!xml_get_next(p_st, XML_PASS_WS))
res = XML_ERR;
found = TRUE;
parse_ok = TRUE;
if (!parse_ok)
xml_incr_pars_res(p_st, XML_ERR);
} while (0);
p_st->status = XML_STS_1STM;
else if (old_pars_res == XML_NO_PROP)
XML_TRACE_DEBUG2("p_st->last_bfr.p:%d, p_st->used_last_bfr:%d",
p_st->last_bfr.p, p_st->used_last_bfr);
/* if there was some data left, read it here. */
if (p_st->partial_st.used_last_bfr )
memcpy(p_st->last_bfr.p, p_st->partial_st.last_bfr.p, p_st->partial_st.used_last_bfr);
p_st->used_last_bfr = p_st->partial_st.used_last_bfr;
p_st->last_bfr.p[p_st->partial_st.used_last_bfr] = 0;
p_st->event_data.part.parse = p_st->partial_st.event_data.part.parse;
/* set length to 0 */
p_st->partial_st.used_last_bfr = 0;
XML_TRACE_DEBUG1("retrieved PARTIAL data = [%s]\n", p_st->last_bfr.p);
p_st->p_cur = p_st->last_bfr.p;
/* continuation packet */
/* read a ch, setup xml_set_bfr */
xml_get_next(p_st, XML_PASS_WS);
p_st->event_data.copy.p_begin = p_st->xml_os.p_begin;
p_st->event_data.copy.last.p = p_st->last_bfr.p;
p_st->event_data.copy.last.len = p_st->used_last_bfr;
p_st->cback(XML_COPY, &(p_st->event_data), p_st->p_usr_data);
if (p_st->used_last_bfr == 0)
p_st->p_cur = p_os->p_begin;
xml_get_next(p_st, XML_PASS_WS);
return XML_NO_MEM;
XML_TRACE_DEBUG1("p_st->p_cur:%d", p_st->p_cur);
XML_TRACE_DEBUG0("XML_MulParse end while");
if (res == XML_SUCCESS)
/* here we found "<(a-z)" */
if (!XML_EOS(p_st))
if (p_st->status == XML_STS_1STM)
/* remeber the beginning position right after '<' in the first line */
/* if the first line can't be parsed at first round, save it to the second parse */
p_st->p_copy = p_st->p_cur - 1;
parse_ok = xml_tag_elem(p_st);
/* parsed the first line */
XML_TRACE_DEBUG0("XML_MulParse exit xml_tag_elem");
if (!parse_ok)
query = p_st->cback(XML_QUERY, &(p_st->event_data), p_st->p_usr_data);
/* if first line parsing is not completed while reach the end of stack, ERROR occurs */
if (query == TRUE)
xml_incr_pars_res(p_st, XML_ERR);
else /* first line parsing to be continued, copy partial data at later point*/
partial = TRUE;
else /* first line is parsed ok, change parsing status */
p_st->status = XML_STS_1TAG;
if (!XML_EOS(p_st) && parse_ok)
parse_ok = xml_elems(p_st, parse_ok);
query = p_st->cback(XML_QUERY, &(p_st->event_data), p_st->p_usr_data);
if (parse_ok == FALSE || query == FALSE)
partial = TRUE;
p_st->status = XML_STS_DONE;
/* copy partial data if any */
if (partial)
if(p_st->pars_res == XML_NO_PROP)
p_st->p_cur = p_st->p_copy;
p_st->event_data.part.parse = p_st->pars_res;
p_st->event_data.part.p_keep = p_st->p_cur;
XML_TRACE_DEBUG1("p_st->p_cur:%d (last_stm)", p_st->p_cur);
p_st->cback(XML_PARTIAL, &(p_st->event_data), p_st->p_usr_data);
xml_incr_pars_res(p_st, XML_NO_END);
if( p_st->last_bfr.p &&
(p_st->p_copy > p_st->xml_os.p_begin) &&
(p_st->p_copy < p_st->xml_os.p_end) )
keep_size = p_st->xml_os.p_end - p_st->p_copy;
if (keep_size < p_st->last_bfr.len)
/* store the partial data to a temporary buffer,
NOT to the queue of buffers as it would overwrite current ones! */
XML_TRACE_DEBUG0("Store partial data\n");
BCM_STRNCPY_S((char *)p_st->partial_st.last_bfr.p, 512, (char *)p_st->p_copy, keep_size);
p_st->partial_st.used_last_bfr= keep_size;
p_st->partial_st.last_bfr.p[keep_size] = 0;
p_st->partial_st.event_data.part.parse = p_st->pars_res;
p_st->partial_st.event_data.part.p_keep= p_st->last_bfr.p;
p_st->cback(XML_PARTIAL, &(p_st->event_data), p_st->p_usr_data);
xml_incr_pars_res(p_st, XML_NO_END);
else if (p_st->last_bfr.p)
keep_size = p_st->xml_os.p_end - p_st->xml_os.p_begin;
for (buffer_used = 0; buffer_used < 512; buffer_used++)
if (p_st->partial_st.last_bfr.p[buffer_used] == 0)
XML_TRACE_DEBUG1("buffer_used %d", buffer_used);
if ((buffer_used < 512) && (keep_size + buffer_used < p_st->last_bfr.len))
/* store the partial data to a temporary buffer,
NOT to the queue of buffers as it would overwrite current ones! */
XML_TRACE_DEBUG0("Store partial data\n");
BCM_STRNCPY_S((char *)p_st->partial_st.last_bfr.p + buffer_used, 512, (char *)p_st->xml_os.p_begin, keep_size);
p_st->partial_st.used_last_bfr= keep_size + buffer_used;
p_st->partial_st.last_bfr.p[keep_size + buffer_used] = 0;
p_st->partial_st.event_data.part.parse = p_st->pars_res;
p_st->partial_st.event_data.part.p_keep= p_st->last_bfr.p;
p_st->cback(XML_PARTIAL, &(p_st->event_data), p_st->p_usr_data);
xml_incr_pars_res(p_st, XML_NO_END);
XML_TRACE_DEBUG0("Insufficient temp buffer of size 512!!");
}/* else NO_PROP */
} /* end of partial */
} /* end of !XML_EOS(p_st) */
} /* end of res == XML_SUCCESS */
return p_st->pars_res;
** Static Functions
** Function xml_set_bfr
** Description
** Sets the buffer that is going to be used when tokens are pushed from
** p_st->next_token into some buffer in the buffer set.
** Parameters
** p_st (in/out) : the parser state
** p_bfr (in) : the buffer that will get all tokens (characters)
** NULL is allowed in which case no buffer is used.
** bfr_max_ind (in) : the max index into the buffer in which a non-null
** char may be stored
** Returns
** -
static void xml_set_bfr(tXML_MUL_STATE *p_st, UINT8 set)
p_st->name.p = p_st->p_cur - 1;
p_st->p_data_bfr = p_st->name.p;
p_st->name.len = 0;
p_st->value.p = p_st->p_cur - 1;
p_st->p_data_bfr = p_st->value.p;
p_st->value.len = 0;
p_st->p_data_bfr = NULL;
** Function xml_write_bfr
** Description
** Pushes (copies) the character from p_st->next_token to the buffer, if
** any, that has been set calling xml_set_bfr.
** Parameters
** p_st (in/out) : the parser state
** Returns
** -
static void xml_write_bfr(tXML_MUL_STATE *p_st)
if (p_st->p_data_bfr)
if(p_st->p_data_bfr == p_st->name.p)
** Function xml_incr_pars_res
** Description
** Sets the final parsing result if the new_res provided has
** higher rank than the current parsing result.
** Parameters
** p_st (in/out) : the parser state
** new_res (in) : the new parsing result
** Returns
** -
static void xml_incr_pars_res(tXML_MUL_STATE *p_st, tXML_RESULT new_res)
if (new_res > p_st->pars_res)
/* preserve these error messages */
case XML_NO_MEM: /* no last_bfr.p, and the tXML_MUL_STATE is not in init */
case XML_NO_PROP: /* run out of tXML_PROP */
case XML_ERR:
p_st->pars_res = new_res;
** Function xml_read_char
** Description
static void xml_read_char(tXML_MUL_STATE *p_st)
BOOLEAN get_new = FALSE;
if (p_st->p_cur && p_st->p_cur >= p_st->last_bfr.p && p_st->p_cur < (p_st->last_bfr.p + p_st->used_last_bfr))
/* left over from previous parse */
p_st->next_token = *p_st->p_cur;
if(p_st->next_token == 0)
/* leftover is done, use the new one */
p_st->p_cur = p_st->xml_os.p_begin;
p_st->last_bfr.p[0] = 0;
p_st->used_last_bfr = 0;
get_new = TRUE;
p_st->curr_res = 1;
if(p_st->p_cur == (p_st->last_bfr.p + p_st->used_last_bfr))
p_st->used_last_bfr = 0;
p_st->p_cur = p_st->xml_os.p_begin;
get_new = TRUE;
if(p_st->p_cur && p_st->p_cur < p_st->xml_os.p_end)
/* use buffer given to XML_Parse */
p_st->next_token = *p_st->p_cur;
p_st->curr_res = 1;
p_st->curr_res = 0;
XML_TRACE_DEBUG4("xml_read_char p_cur: x%x, curr_res:%d, get_new:%d, token:%c",
p_st->p_cur, p_st->curr_res, get_new, p_st->next_token);
** Function xml_get_next
** Description
** Writes the character in p_st->next_token to the current buffer if set.
** Then the next character is read from the Object Store into
** p_st->next_token. The first time get_next is called, the current
** buffer must be NULL, i.e p_st->data_bfr must be NULL.
** xml_get_next handles end-of-line as specified in the xml spec. It
** passes, skips or normalizes (p.29 in XML spec) white spaces (ws)
** as specified in the ws_op param. Note, the ws_op applies when
** getting one (or many characters) from Object Store into the
** p_st->next_token. It does not apply when pushing the (initial)
** p_st->next_token to the current buffer.
** The characters are read one by one from the Object Store.
** Presently this is not anticipated to cause any problems
** regarding reading speed. Should it become a problem in the
** future, a new buffer could be introduced into which a chunk
** of characters could be put, using one Object Store read call.
** The get_next function would then get the next character from
** the new buffer.
** Parameters
** p_st (in/out) : the parser state
** ws_op (in) : the requested white space handling.
** Returns
** True if a character was successfully read into p_st->next_token.
** False otherwise.
static BOOLEAN xml_get_next(tXML_MUL_STATE *p_st, tXML_WS_OP ws_op)
} while ((ws_op == XML_SKIP_WS) && XML_IS_WS(p_st->next_token) &&
/* handle end-of-line if any after the do-while above */
if (!XML_EOS(p_st) && (p_st->next_token == 0xA) && p_st->skip_next_nl)
{ /* we have previously found 0xD (cr) and have set the state var
** p_st->skip_next_nl,see below
p_st->skip_next_nl = FALSE;
if (XML_EOS(p_st))
p_st->next_token = 0;
return FALSE;
if (p_st->next_token == 0xD)
p_st->next_token = 0xA;
p_st->skip_next_nl = TRUE;
if ((ws_op == XML_NORM_WS) &&
((p_st->next_token == 0xA) || (p_st->next_token == 0x9)))
p_st->next_token = 0x20;
return TRUE;
** Function xml_find_ch
** Description
** Searches for the character given in ch. It starts searching in
** p_st->next_token and if not found it gets characters from the Object
** Store until ch is in p_st->next_token.
** Parameters
** p_st (in/out) : the parser state
** ch (in) : the character to search for
** ws_op (in) : the requested white space handling when getting chars
** Returns
** True if the character was found.
** False otherwise.
static BOOLEAN xml_find_ch(tXML_MUL_STATE *p_st, UINT8 ch, tXML_WS_OP ws_op)
while (!XML_EOS(p_st) && (p_st->next_token != ch))
xml_get_next(p_st, ws_op);
return (BOOLEAN) !XML_EOS(p_st);
** Function xml_find_ch_n
** Description
** Same function as xml_find_ch, except that any character in p_str
** that is found will stop the search.
** Parameters
** p_st (in/out) : the parser state
** p_str (in) : the string containing the characters searched for.
** Must not be NULL or an empty string.
** Returns
** True if any of the characters in p_str was found.
** Fase otherwise.
static BOOLEAN xml_find_ch_n(tXML_MUL_STATE *p_st, const UINT8 *p_str)
const UINT8 *p_tmp;
while (!XML_EOS(p_st))
for (p_tmp = p_str; *p_tmp; p_tmp++)
if (p_st->next_token == *p_tmp)
return TRUE;
xml_get_next(p_st, XML_PASS_WS);
return FALSE;
** Function xml_find_str
** Description
** Searches for p_str (i.e the exact sequence of characters in p_str) in
** the input from Object Store. The function ends with the character
** succeeding p_str in the input, (i.e that char is in p_st->next_token
** upon return) or with XML_EOS.
** Parameters
** p_st (in/out) : the parser state
** p_str (in) : the string to search for and pass by.
** Must not be NULL or an empty string.
** Returns
** True if the string was found.
** False otherwise.
static BOOLEAN xml_find_str(tXML_MUL_STATE *p_st, const UINT8 *p_str)
const UINT8 *p_tmp;
p_tmp = p_str;
while (*p_tmp && !XML_EOS(p_st))
for (p_tmp = p_str; *p_tmp && !XML_EOS(p_st); p_tmp++)
if (p_st->next_token != *p_tmp)
xml_get_next(p_st, XML_PASS_WS);
if ((p_tmp == p_str) && !XML_EOS(p_st))
xml_get_next(p_st, XML_PASS_WS);
return (BOOLEAN) (*p_tmp == 0);
** Function xml_consume_str
** Description
** Checks for p_str i.e that the first character from p_str is in
** p_st->next_token and that the successors immediately follows in the
** Object Store. The p_str must not be last in the Object Store.
** Parameters
** p_st (in/out) : the parser state
** p_str (in) : the string to check if present next and to pass by
** Must not be NULL.
** Returns
** True if the string was found and was not last in the Object Store.
** False otherwise.
static BOOLEAN xml_consume_str(tXML_MUL_STATE *p_st, const UINT8 *p_str)
if (p_st->next_token != *p_str)
return FALSE;
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
} while (*p_str);
return TRUE;
** Function xml_resolve_refs
** Description
** Resolves predefined entity references (sect. 4.6 in the XML spec)
** and character references (sect 4.1) that may be found in
** AttValue and content. (According to the XML spec it may also
** be in an EntityValue. However EntityValues are in the
** doctypedecl part which is not supported).
** The AttValue and content not beginning with a tag, must be
** stored in the p_st->p_bfr_set->value buffer.
** Parameters
** p_st (in/out) : the parser state
** Returns
** -
static void xml_resolve_refs(tXML_MUL_STATE *p_st)
UINT8 *p_srch; /* where next search for & starts */
UINT8 *p_am; /* points to found & */
UINT8 *p_sc; /* points to found ; and succeeding chars */
UINT8 *p_start;
UINT8 *p_tmp;
UINT32 ch_code;
UINT32 tmp_code;
INT8 i;
BOOLEAN resolved;
UINT16 len_left;
p_srch = p_st->value.p;
len_left = p_st->value.len;
p_start = p_srch;
p_am = (UINT8*) strchr((char*) p_srch, XML_AM);
p_sc = p_am ? (UINT8*) strchr((char*) p_am, XML_SC) : NULL;
/* make sure the ptr does not exceed the end of the value str */
if(p_sc > (len_left + p_start))
p_sc = NULL;
if (p_am && p_sc)
resolved = FALSE;
p_tmp = p_am + 1;
*p_sc = 0; /* terminate the ref by replacing ; with 0 */
if (*p_tmp == XML_PD) /* character ref */
if (p_tmp[1] == XML_HX)
*p_tmp = '0';
for(p_tmp++; *p_tmp == '0'; p_tmp++)
ch_code = utl_strtoul((char*) p_tmp, NULL, 0);
/* skip leading zero bytes */
for (i = 3; (i >= 0) && !(ch_code >> i * 8); i--)
p_tmp = p_am;
while (i >= 0)
/* mask out one byte and shift it rightmost */
/* preceding bytes must be zero so shift left first */
tmp_code = ch_code << ((3-i) * 8);
*p_tmp = (UINT8) (tmp_code >> 24);
resolved = TRUE;
else if (p_tmp < p_sc) /* check if predefined ref */
resolved = TRUE;
if (strcmp((char*) p_tmp, XML_LT_STR) == 0)
*p_am = XML_ST;
p_st->value.len = p_st->value.len - 3; /* remove the length for lt; */
p_st->p_cur = p_st->p_cur - 3;
else if (strcmp((char*) p_tmp, XML_GT_STR) == 0)
*p_am = XML_GT;
p_st->value.len = p_st->value.len - 3; /* remove the length for gt; */
p_st->p_cur = p_st->p_cur - 3;
else if (strcmp((char*) p_tmp, XML_AMP_STR) == 0)
*p_am = XML_AM;
p_st->value.len = p_st->value.len - 4; /* remove the length for amp; */
p_st->p_cur = p_st->p_cur - 4;
else if (strcmp((char*) p_tmp, XML_APOS_STR) == 0)
*p_am = XML_SQ;
p_st->value.len = p_st->value.len - 5; /* remove the length for apos; */
p_st->p_cur = p_st->p_cur - 5;
else if (strcmp((char*) p_tmp, XML_QUOT_STR) == 0)
*p_am = XML_DQ;
p_st->value.len = p_st->value.len - 5; /* remove the length for quot; */
p_st->p_cur = p_st->p_cur - 5;
resolved = FALSE;
if (resolved)
p_srch = p_tmp; /* will contain char after ; */
*p_tmp++ = *p_sc++;
*p_sc = XML_SC; /* restore the ref end */
p_srch = p_sc + 1;
} /* end if */
} while (*p_srch && p_am && p_sc);
** Function xml_remove_trail_ws
** Description
** Removes trailing white space from the p_st->p_data_bfr buffer.
** Parameters
** p_st (in/out) : the parser state
** Returns
** -
static void xml_remove_trail_ws(tXML_MUL_STATE *p_st)
UINT16 xx;
xx = p_st->value.len;
while(xx && XML_IS_WS(p_st->value.p[xx-1]))
p_st->value.len = xx;
** Parsing Static Functions
** Function xml_name
** Description
** Parses a name and its prefix if any. The prefix and name buffers
** are set.
** The functions ends with either white space,
** XML_EQ, XML_EM or XML_GT in p_st->next_token or with XML_EOS.
** Parameters
** p_st (in/out) : the parser state
** Returns
** True if no error was found.
** False otherwise.
static BOOLEAN xml_name(tXML_MUL_STATE *p_st)
p_st->prefix.p = NULL;
p_st->prefix.len = 0;
xml_set_bfr(p_st, XML_SET_NAME);
xml_find_ch_n(p_st, xml_name_srch);
if (!XML_EOS(p_st) && (p_st->next_token == XML_CO))
if (p_st->name.len)
found = TRUE;
/* p_st->name.len is string size in name buffer, \0 excl.
p_st->prefix.p = p_st->name.p;
p_st->prefix.len = p_st->name.len;
xml_get_next(p_st, XML_PASS_WS);
xml_set_bfr(p_st, XML_SET_NAME);
if (!XML_EOS(p_st))
xml_find_ch_n(p_st, xml_name_srch + 1);
found = (BOOLEAN) (found || p_st->name.len);
xml_set_bfr(p_st, XML_SET_CLEAR);
return found;
** Function xml_attributes
** Description
** Parses an attribute list.
** The functions ends with the XML_GT or XML_EM char or XML_EOS.
** Error is reported if the attribute list is last in the Object
** Store.
** Sends a XML_ATTRIBUTE event in the user callback for each
** attribute found.
** Parameters
** p_st (in/out) : the parser state
** Returns
** True if no error was found.
** False otherwise.
static BOOLEAN xml_attributes(tXML_MUL_STATE *p_st)
BOOLEAN cb_ret = TRUE;
UINT8 q_ch;
XML_TRACE_DEBUG1("[xml_parse] xml_attributes: res= %d", p_st->pars_res);
while ( cb_ret)
/* if this is a white space, then the next character is read from the
Object Store into p_st->next_token */
if( XML_IS_WS(p_st->next_token) )
if (!xml_get_next(p_st, XML_SKIP_WS))
return FALSE;
if (p_st->next_token == XML_EQ)
return FALSE;
if ((p_st->next_token == XML_GT) || (p_st->next_token == XML_EM))
return TRUE;
if (!xml_name(p_st) || XML_EOS(p_st))
return FALSE;
if (!xml_get_next(p_st, XML_SKIP_WS))
return FALSE;
if (p_st->next_token != XML_EQ)
return FALSE;
if (!xml_get_next(p_st, XML_SKIP_WS))
return FALSE;
if ((p_st->next_token != XML_SQ) && (p_st->next_token != XML_DQ))
return FALSE;
q_ch = p_st->next_token;
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
xml_set_bfr(p_st, XML_SET_VALUE);
if (!xml_find_ch(p_st, q_ch, XML_NORM_WS))
return FALSE;
xml_set_bfr(p_st, XML_SET_CLEAR);
p_st->event_data.attr.prefix.p = p_st->prefix.p;
p_st->event_data.attr.prefix.len = p_st->prefix.len;
p_st-> = p_st->name.p;
p_st-> = p_st->name.len;
p_st->event_data.attr.value.p = p_st->value.p;
p_st->event_data.attr.value.len = p_st->value.len;
p_st->value.len = 0;
cb_ret = p_st->cback(XML_ATTRIBUTE, &(p_st->event_data), p_st->p_usr_data);
/* TODO chk cback return */
if(cb_ret == FALSE)
xml_incr_pars_res(p_st, XML_NO_PROP);
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
** Function xml_elems
** Description
** Parses all elements with all their content.This function is not a
** one-to-one mapped implementation of one production from the XML spec.
** Instead it uses a simplified iterative (as opposed to recursive)
** approach when parsing both the element and content productions.
** When a parsing error is found, this function tries to recover by
** searching for the next element (tag).
** When char data is found, the function sends the XML_CHARDATA event in
** the user callback.
** Other static functions with production names, start their parsing
** from the first character in their production. They might check
** that the first character (token) in the production matches
** p_st->next_token, alternatively they can just get rid of the first
** char in the production by calling get_next_ch. The exceptions to this
** are the xml_qm_elem, xml_ex_elem, xml_etag_elem and the xml_tag_elem
** functions which starts with the XML_QM, XML_EX, XML_EM and the first
** char in the tag name, respectively.
** Parameters
** p_st (in/out) : the parser state
** prev_ok (in) : if parsing done before calling this function was
** ok. If not, the functions starts with recovering.
** Returns
** True if parsing was successful possibly with successful recoveries.
** False if an error was found from which recovery failed (XML_EOS).
static BOOLEAN xml_elems(tXML_MUL_STATE *p_st, BOOLEAN prev_ok)
BOOLEAN tag_found;
BOOLEAN cb_ret = TRUE;
while (!XML_EOS(p_st) && prev_ok)
/* remove leading ws even if char data */
if (XML_IS_WS(p_st->next_token))
if (!xml_get_next(p_st, XML_SKIP_WS))
return TRUE;
tag_found = (BOOLEAN) (p_st->next_token == XML_ST);
if (!tag_found)
xml_set_bfr(p_st, XML_SET_VALUE);
tag_found = xml_find_ch(p_st, XML_ST, XML_PASS_WS);
if (p_st->value.len > 0)
p_st->event_data.ch_data.value.p = p_st->value.p;
p_st->event_data.ch_data.value.len = p_st->value.len;
p_st->event_data.ch_data.last = TRUE;
p_st->value.len = 0;
cb_ret = p_st->cback(XML_CHARDATA, &(p_st->event_data), p_st->p_usr_data);
/* TODO chk cback return */
if(cb_ret == FALSE)
xml_incr_pars_res(p_st, XML_NO_PROP);
return FALSE;
xml_set_bfr(p_st, XML_SET_CLEAR);
if (!tag_found)
return prev_ok;
p_st->p_last_stm = p_st->p_cur - 1;
if (p_st->p_cur)
p_st->p_copy = p_st->p_last_stm;
p_st->cback(XML_TOP, &(p_st->event_data), p_st->p_usr_data);
/* tag was found */
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
if (p_st->next_token == XML_QM)
prev_ok = xml_qm_elem(p_st);
else if (p_st->next_token == XML_EX)
prev_ok = xml_ex_elem(p_st, FALSE);
else if (p_st->next_token == XML_EM)
prev_ok = xml_etag_elem(p_st);
prev_ok = xml_tag_elem(p_st);
if (!prev_ok)
xml_incr_pars_res(p_st, XML_ERR);
XML_TRACE_DEBUG1("xml_elems prev_ok:%d", prev_ok);
return prev_ok;
** Function xml_qm_elem
** Description
** Recognises all productions starting with "<?". That is PI and XML decl.
** These productions are skipped and XML_WARNING is set.
** The function starts with the XML_QM as the first char (is in
** p_st->next_token).It ends with the XML_GT successor (is in
** p_st->next_token) or XML_EOS.
** Parameters
** p_st (in/out) : the parser state
** Returns
** True if no error was found trying to recognise the start and end of
** the productions. False otherwise.
static BOOLEAN xml_qm_elem(tXML_MUL_STATE *p_st)
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
if (!xml_find_str(p_st, (UINT8*) XML_QTAG_END_STR))
return FALSE;
xml_incr_pars_res(p_st, XML_WARNING);
return TRUE;
** Function xml_ex_elem
** Description
** Handles all productions starting with "<!". They are Comments, CDSect
** doctypedecl and markupdecl. All are skipped. However, the inpar
** prolog must be set for the function to try to detect the doctypedecl
** and markupdecl beginning.
** The function starts with the XML_EX as the first char.
** The function ends with XML_EOS or the char succeeding XML_GT,
** except for doctypedecl and marcupdecl which ends with the next XM_TAG.
** Parameters
** p_st (in/out) : the parser state
** prolog (in) : should be set if in prolog in which case the function
** tries to detect (allows) the beginning of doctypedecl
** and markupdecl.
** Returns
** True if no error was found trying to recognise the start and end of
** the productions. False otherwise.
static BOOLEAN xml_ex_elem(tXML_MUL_STATE *p_st, BOOLEAN prolog)
UINT8 q_ch;
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
if (p_st->next_token == XML_HY) /* comment */
if (!xml_consume_str(p_st, (UINT8*) XML_COMM_STR))
return FALSE;
if (!xml_find_str(p_st, (UINT8*) XML_COMM_END_STR))
return FALSE;
else if (p_st->next_token == XML_LB) /* CDSect */
if (!xml_consume_str(p_st, (UINT8*) XML_CDS_STR))
return FALSE;
if (!xml_find_str(p_st, (UINT8*) XML_CDS_END_STR))
return FALSE;
xml_incr_pars_res(p_st, XML_WARNING);
else if (prolog) /* doctypedecl or markupdecl */
if (!xml_find_ch_n(p_st, (UINT8*) XML_DOCT_STR))
return FALSE;
if ((p_st->next_token == XML_SQ) || (p_st->next_token == XML_DQ))
q_ch = p_st->next_token;
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
if (!xml_find_ch(p_st, q_ch, XML_PASS_WS))
return FALSE;
xml_get_next(p_st, XML_PASS_WS);
} while (!XML_EOS(p_st) && (p_st->next_token != XML_ST));
xml_incr_pars_res(p_st, XML_WARNING);
else /* error */
return FALSE;
return TRUE;
** Function xml_tag_elem
** Description
** Parses a tag element. The function starts with the char succeeding the
** XML_ST char.
** The functions ends with the char succeeding the XML_GT char or
** with XML_EOS.
** Sends the XML_TAG and the XML_TAG_END events in a callback each.
** Parameters
** p_st (in/out) : the parser state
** Returns
** True if no error was found.
** False otherwise.
static BOOLEAN xml_tag_elem(tXML_MUL_STATE *p_st)
BOOLEAN cb_ret = TRUE;
if (!xml_name(p_st))
return FALSE;
p_st->event_data.tag.prefix.p = p_st->prefix.p;
p_st-> = p_st->name.p;
p_st->event_data.tag.prefix.len = p_st->prefix.len;
p_st-> = p_st->name.len;
p_st->event_data.tag.p_last_stm = p_st->p_last_stm;
cb_ret = p_st->cback(XML_TAG, &(p_st->event_data), p_st->p_usr_data);
if(cb_ret == FALSE)
xml_incr_pars_res(p_st, XML_NO_PROP);
return FALSE;
/* TODO chk cback return */
if (XML_EOS(p_st))
return FALSE;
if (XML_IS_WS(p_st->next_token))
if (!xml_attributes(p_st))
return FALSE;
p_st->event_data.empty_elem.end = (BOOLEAN) (p_st->next_token == XML_EM);
if (p_st->event_data.empty_elem.end)
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
if (p_st->next_token != XML_GT)
return FALSE;
xml_get_next(p_st, XML_PASS_WS);
cb_ret = p_st->cback(XML_TAG_END, &(p_st->event_data), p_st->p_usr_data);
if(cb_ret == FALSE)
xml_incr_pars_res(p_st, XML_NO_PROP);
return FALSE;
p_st->p_copy = p_st->p_cur - 1;
p_st->cback(XML_TOP, &(p_st->event_data), p_st->p_usr_data);
/* TODO chk cback return */
return TRUE;
** Function xml_etag_elem
** Description
** Parses an end tag element. The function starts with the XML_EM char.
** The functions ends with the char succeeding the XML_GT char or
** with XML_EOS. Sends the XML_ETAG event in the user callback.
** Parameters
** p_st (in/out) : the parser state
** Returns
** True if no error was found.
** False otherwise.
static BOOLEAN xml_etag_elem(tXML_MUL_STATE *p_st)
BOOLEAN cb_ret = TRUE;
if (!xml_get_next(p_st, XML_PASS_WS))
return FALSE;
if (!xml_name(p_st))
return FALSE;
p_st->event_data.etag.prefix.p = p_st->prefix.p;
p_st-> = p_st->name.p;
p_st-> = p_st->name.len;
p_st->event_data.etag.prefix.len = p_st->prefix.len;
cb_ret = p_st->cback(XML_ETAG, &(p_st->event_data), p_st->p_usr_data);
if(cb_ret == FALSE)
xml_incr_pars_res(p_st, XML_NO_PROP);
return FALSE;
p_st->p_copy = (p_st->prefix.p) ? p_st->prefix.p - 2: p_st->name.p - 2;
p_st->cback(XML_TOP, &(p_st->event_data), p_st->p_usr_data);
/* TODO chk cback return ??? */
if (XML_EOS(p_st))
return FALSE;
if (XML_IS_WS(p_st->next_token))
if (!xml_get_next(p_st, XML_SKIP_WS))
return FALSE;
if (p_st->next_token != XML_GT)
return FALSE;
xml_get_next(p_st, XML_PASS_WS);
return TRUE;