#include "feed_parser.h" feed_parser::feed_parser() { tag_open=false; } feed_parser::~feed_parser() { } feed_parser::container_error feed_parser::get_container_data(string& sContainerData, istream* input) { char cCurrent = input->get(); sContainerData=""; while(('<'!=cCurrent) && (!input->fail())) { sContainerData+=char(cCurrent); cCurrent = input->get(); } input->putback(cCurrent); sContainerData = sContainerData.substr(0, sContainerData.find_last_not_of(" \n\r\t")+1); if (input->fail()) return CONTAINER_OPEN; return CONTAINER_OK; } int feed_parser::get_tag(string& sOutput, istream* input) { sOutput = ""; char cCurrent = input->get(); eatwhitespace(input); while(('>'!=cCurrent) && (!input->fail())) { if (!isspace(cCurrent)) { sOutput+=char(cCurrent); cCurrent = input->get(); } else { while(('>'!=input->get()) && (!input->fail())) cCurrent='>'; } } // cout << "tagsize --> " << sOutput.length() << endl; // cout << sOutput << endl; return 0; } void feed_parser::parse_feed(istream* input) { string tag=""; string container_data=""; bool inside_an_item_tag=false; bool pubdate=false; bool dcdate=false; while(!input->fail()) { if ('<'==char(input->get())) { if ('/'!=char(input->peek())) { tag_open = true; } else { input->get(); tag_open = false; } eatwhitespace(input); get_tag(tag, input); eatwhitespace(input); if (tag_open) { eatwhitespace(input); transform( tag.begin(), tag.end(), tag.begin(), ToLower()); get_container_data(container_data, input); if (tag == "item") { inside_an_item_tag = true; } else if (inside_an_item_tag) { if (tag == "title") { cout << "title ---> "; display_container_data(container_data); } else if ((!dcdate) && (tag == "pubdate")) { pubdate=true; cout << "pubdate -> "; display_container_data(container_data); } else if ((!pubdate) && (tag == "dc:date" )) { dcdate=true; cout << "dcdate -> "; display_container_data(container_data); } } } else { if (tag == "item") { inside_an_item_tag = false; } eatwhitespace(input); } } } } void feed_parser::display_container_data(string data) { if (data.length() > 0) { cout << data << endl; } } void feed_parser::parse(char* inputfile) { ifstream* input = new ifstream; input->open(inputfile); parse_feed(input); input->close(); delete input; } void feed_parser::parse_stream(istream* inputfile) { parse_feed(inputfile); } void feed_parser::parse_url(string url) { string command= "wget -O- " + url + " 2>/dev/null"; FILE* fp = popen(command.c_str(), "r" ); if (fp == NULL) { cout << "blah"; exit(1); } __gnu_cxx::stdio_filebuf* ibuf = new __gnu_cxx::stdio_filebuf(fp,std::ios_base::in,1); std::istream * in = new std::istream(ibuf); parse_stream(in); } void feed_parser::eatwhitespace(istream* input) { char cCurrent = input->peek(); while(isspace(cCurrent)) { input->get(); cCurrent = input->peek(); } }