// this
is an array of arrays, with each array element representing an
<item> // each outer array element is itself an associative array
// with keys ("title", "link", "description")
$items = array(),
// opening tag handler
function elementBegin($parser, $name, $attributes)
{
global $currentTag, $flag,
$currentTag = $name,
// set flag if entering <channel> or <item> block
if ($name == "ITEM")
{
$flag = 1,
}
else if ($name == "CHANNEL")
{
$flag = 2,
}
}
// closing tag handler
function elementEnd($parser, $name)
{
global $currentTag, $flag, $count,
$currentTag = "",
// set flag if exiting <channel> or <item> block
if ($name == "ITEM")
{
$count++,
$flag = 0,
}
else if ($name == "CHANNEL")
{
$flag = 0,
}
}
// character data handler
function characterData($parser, $data)
{
global $currentTag, $flag, $items, $count, $channel,
$data = trim(htmlspecialchars($data)),
if ($currentTag == "TITLE" || $currentTag == "LINK" ||
$currentTag ==
"DESCRIPTION")
{
// add data to $channels[] or $items[] array
if ($flag == 1)
{
$items[$count][strtolower($currentTag)]
.=
$data,
}
else if ($flag == 2)
{
$channel[strtolower($currentTag)]
.= $data,
}
}
}
// create parser
$xp = xml_parser_create(),
// set element handler
xml_set_element_handler($xp,
"elementBegin", "elementEnd"),
xml_set_character_data_handler($xp,
"characterData"),
xml_parser_set_option($xp,
XML_OPTION_CASE_FOLDING, TRUE),
xml_parser_set_option($xp,
XML_OPTION_SKIP_WHITE, TRUE),
// read XML file
if (!($fp = fopen($file,
"r")))
{
die("Could not read $file"),
}
// parse data
while ($xml = fread($fp,
4096))
{
if (!xml_parse($xp, $xml, feof($fp)))
{
die("XML parser error: " .
xml_error_string(xml_get_error_code($xp))),
}
}
// destroy parser
xml_parser_free($xp),
// now iterate through $items[] array
// and print each item as a table row
foreach ($items as $item)
{
echo "<tr><td><a href=" . $item["link"]
. ">" . $item["title"] .
"</a><br>" .
$item["description"] .
"</td></tr>",
}
?>
</table>
</body>
</html>
与先前的那段的主要区别在于,这段脚本创建了两个数组,用于保存分析过程中所提取的信息。其中,$channel是联合性数组(associative array),存放被处理的频道的基本描述信息,而$items是一个二维数组,包含关于单独的频道条目(channel intems)的信息。$items数组中的每一个元素本身又是一个联合性数组,包含title,URL和description关键字。$items数组中元素总数与RDF文档中的<item>区块总数相同。
还需注意$flag变量的变化,根据被处理的是<channel></channel>区块还是<item></item>区块,它现在保存两个值。这一点很有必要,因为只有这样,分析器才能把信息放入正确的数组里面。
一旦文档分析完毕,事情就简单了——遍历$items
数组,以表格形式打印其中的每一个条目(item)。远行结果如下:
7)返回到类(Back To Class)
既然你有这么大的权力,那么究竟为什么要把自己限制在仅仅是单个的RDF来源呢?就象我早先说过的一样,大多数主要的站点都经常为他们所提供的内容做快照。其实将所有这些不同的来源插入到你的站点当中是相当简单的。让我们看看是如何做的。
首先,我们把前面例子中的代码模块化。这样一来,你就无须为每一个单个的来源都一遍又一遍的重写相同的代码了。简化的方法就是将之打包成类,再把这个类包含到我的PHP脚本当中。
类代码如下:
<?
class RDFParser
{
//
// variables
//
// set up local variables for this class
var $currentTag =
"",
var $flag = "",
var $count = 0,
// this is an associative array of channel data with keys
("title", "link", "description")
var $channel = array(),
// this is an array of arrays, with each array element
representing an <item>
// each outer array element is itself an associative array
// with keys ("title", "link", "description")
var $items = array(),
//
// methods
//
// set the name of the RDF file to parse
// this is usually a local file
// you may set it to a remote file if your PHP build supports
URL fopen()
function setResource($file)
{
$this->file = $file,
}
// parse the RDF file set with setResource()
// this populates the $channel and $items arrays
function parseResource()
{
// create parser
$this->xp = xml_parser_create(),
// set object reference
xml_set_object($this->xp,
$this),
// set handlers and parser options
xml_set_element_handler($this->xp,
"elementBegin",
"elementEnd"),
xml_set_character_data_handler($this->xp,
"characterData"),
xml_parser_set_option($this->xp,
XML_OPTION_CASE_FOLDING, TRUE),
xml_parser_set_option($this->xp,
XML_OPTION_SKIP_WHITE,
TRUE),
// read XML file