#include < curl / curl.h >
#include < iostream >
#include < stdio.h >
#include < string.h >
#include < pcre.h >
#define oveccount 30
/* should be a multiple of 3 */
#define ebuflen 128
#define buflen 10240
using namespacestd;
size_t onwritedata(void * buffer, size_t size, size_t nmemb, void * str) {
if (!str || !buffer) {
return - 1;
}
string * result = (string * ) str;
result - >append((char * ) buffer, size * nmemb);
return nmemb;
}
//获取页面
int getweb(string url, string & result)
{
long code = 0;
string htmlpage;
curl * curl = curl_easy_init();
curl_easy_setopt(curl, curlopt_url, url.c_str()); //设置url
curl_easy_setopt(curl, curlopt_post, 0); //设置请求方法
curl_easy_setopt(curl, curlopt_useragent, "mozilla/5."); //伪装客户端
curl_easy_setopt(curl, curlopt_writedata, &htmlpage); //设置接受返回结果字符串
curl_easy_setopt(curl, curlopt_writefunction, onwritedata); //设置处理方法
curl_easy_perform(curl); //请求
curl_easy_getinfo(curl, curlinfo_response_code, &code);
if (code == 200)
{
cout << "request success" << endl;
result = htmlpage;
//cout<<htmlpage<<endl;
}
curl_easy_cleanup(curl);
return code;
}
int main(int argc, char * *argv)
{
pcre * re;
constchar * error;
int erroffset;
int ovector[oveccount];
int rc,
i;
string url = "https://www.dianping.com/search/category/212/10/g103";
string html;
getweb(url, html);
//char src[] = " ";
//char pattern[] = "(<a>.+?</a>)";
constchar * src = html.c_str();
char pattern[] = "(<li class=\"\"[\\s\\s]*?</li>)";
printf("string : %s\n", src);
printf("pattern: \"%s\"\n", pattern);
re = pcre_compile(pattern, 0, &error, &erroffset, null);
if (re == null) {
printf("pcre compilation failed at offset %d: %s\n", erroffset, error);
return1;
}
char * p = (char * ) src;
while ((rc = pcre_exec(re, null, p, strlen(p), 0, 0, ovector, oveccount)) != pcre_error_nomatch)
{
printf("\nok, %d matched ...\n\n", rc);
for (i = 0; i < rc - 1; i++)
{
char * substring_start = p + ovector[2 * i];
int substring_length = ovector[2 * i + 1] - ovector[2 * i];
char matched[10240];
memset(matched, 0, 10240);
strncpy(matched, substring_start, substring_length);
printf("match:%s\n", matched);
}
p += ovector[1];
if (!p)
{
break;
}
}
pcre_free(re);
return0;
}
用户登录
还没有账号?立即注册
用户注册
投稿取消
| 文章分类: |
|
还能输入300字
上传中....
情不知所起一往而深x