我刚刚实现了一个 HTTP/1.1 客户端来解析分 block 传输编码。但是,它适用于某些网站,但对其他网站无效。我假设我需要为每个 block 数据读取 chunkSize + 2 字节,包括 \r\n,对吗?
这是我的代码:
while(chunked)//if detecting chunked in the header before, this is true
{
//getLine is a function can read a line separated by \r\n
//sockfd is a socket created before and file position is at the start of HTTP body (after that blank line between header and body)
line = getLine(sockfd);
printf("%s", line);//print the chunk size line in hex
int chunkSize = strtol(line, NULL, 16);
if(chunkSize == 0)
{
printf("##### Read chunk size of 0, reading until we hit end of stream.\n");
break;
}
printf("##### Chunk size (in hex above) is %d in decimal and is printed here:\n", chunkSize);
char* chunkBuf = (char *)malloc(chunkSize + 2 + 1);//2 for \r\n, 1 for \0
bzero(chunkBuf, chunkSize + 3);
if(read(sockfd, chunkBuf, chunkSize + 2) == 0)//sockfd is a socket created before
{
perror("Read Error: ");
exit(EXIT_FAILURE);
}
printf("%s", chunkBuf);//print the chunk content
free(chunkBuf);
}
实际上我可以不解析就打印出全部内容,即逐行打印,所以我想我可能在上面的代码中犯了一些错误,谁能给我一些提示?
下面是完整的代码供您引用:
#include <stdio.h>
#include <sys/socket.h>
#include <arpa/inet.h>
#include <stdlib.h>
#include <netdb.h>
#include <netinet/in.h>
#include <string.h>
#include <stdbool.h>
#include <unistd.h>
#define HTTP_VERSION "HTTP/1.1"
#define PAGE "/"
int createSokect()
{
int socketfd = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
if(socketfd < 0)
{
perror("Cannot create socket\n");
exit(EXIT_FAILURE);
}
return socketfd;
}
char* getIP(char* host)
{
struct hostent* hent;
int len = 15;//xxx.xxx.xxx.xxx
char *ipaddr = (char *)malloc(len + 1);//one more \0
bzero(ipaddr, len + 1);
if((hent = gethostbyname(host)) == NULL)
{
printf("Cannot get IP for this host: %s\n", host);
exit(EXIT_FAILURE);
}
if(inet_ntop(AF_INET, (void*)hent->h_addr_list[0], ipaddr, len) == NULL)
{
printf("Cannot resolve IP for this host: %s\n", host);
exit(EXIT_FAILURE);
}
return ipaddr;
}
char* createQuery(char* host, char* page)
{
char* msg = "GET %s %s\r\nHost: %s\r\nConnection: close\r\n\r\n";
char* query = (char *)malloc(strlen(host) + strlen(page) + strlen(msg) + strlen(HTTP_VERSION) - 6 + 1);//-6: %s %s %s
sprintf(query, msg, page, HTTP_VERSION, host);
return query;
}
char* getLine(int fd)
{
char c = 0, pre = 0;
char* line = 0;
int size = 1;
int pos = 0;
while(read(fd, &c, 1)!=0)
{
if(pos + 1 == size)
{
size *= 2;
line = realloc(line, size);
}
line[pos++] = c;
//printf("%c", c);
if(pre == '\r' && c == '\n')//this is a new line
{
break;
}
pre = c;
}
if(line)
{
line[pos++] = 0;
}
return line;
}
int main(int argc, char** argv)
{
if(argc < 3)
{
perror("Need more arguments");
exit(EXIT_FAILURE);
}
int sockfd = createSokect();
char* ip = getIP(argv[1]);
printf("Host: %s\n", argv[1]);
printf("IP: %s\n", ip);
struct sockaddr_in server;
server.sin_family = AF_INET;
int err = inet_pton(server.sin_family, ip, (void *)(&(server.sin_addr.s_addr)));
if(err != 1)
{
perror("Cannot convert IP to binary address\n");
exit(EXIT_FAILURE);
}
server.sin_port = htons(atoi(argv[2]));
printf("port: %d\n", server.sin_port);
//connect to the server
if(connect(sockfd, (struct sockaddr *)&server, sizeof(server)) < 0)
{
printf("Cannot connect: %d\n", err);
exit(EXIT_FAILURE);
}
char* query = createQuery(argv[1], PAGE);
printf("##### CLIENT IS SENDING THE FOLLOWING TO SERVER:\n");
printf("%s", query);
int offset = 0;
//send query to the server
err = send(sockfd, query + offset, strlen(query) - offset, 0);
if(err < 0)
{
perror("Cannot send query");
exit(EXIT_FAILURE);
}
printf("##### CLIENT RECEIVED THE FOLLOWING FROM SERVER:\n");
//receive message line by line
bool chunked = false;
char* line;
while((line = getLine(sockfd)) != NULL)
{
printf("%s", line);
if(!strcasecmp(line, "transfer-encoding: chunked\r\n"))
{
chunked = true;
//printf("Chunked here\n");
}
if(!strcmp(line, "\r\n"))
{
printf("##### Just read blank line, now reading body.\n");
if(chunked)//chunked, we print those in another way, otherwise line by line
{
free(line);
break;
}
}
free(line);
}
while(chunked)
{
line = getLine(sockfd);
printf("%s", line);
int chunkSize = strtol(line, NULL, 16);
if(chunkSize == 0)
{
printf("##### Read chunk size of 0, reading until we hit end of stream.\n");
break;
}
printf("##### Chunk size (in hex above) is %d in decimal and is printed here:\n", chunkSize);
char* chunkBuf = (char *)malloc(chunkSize + 2 + 1);//2 for \r\n, 1 for \0
bzero(chunkBuf, chunkSize + 3);
if(read(sockfd, chunkBuf, chunkSize + 2) == 0)
{
perror("Read Error: ");
exit(EXIT_FAILURE);
}
printf("%s", chunkBuf);
free(chunkBuf);
}
//receive message from the server
/*
char buf[2048];
bzero(buf, sizeof(buf));
err = recv(sockfd, buf, sizeof(buf), 0);
if(err < 0)
{
perror("Receive error");
exit(EXIT_FAILURE);
}
char *content = buf;
fprintf(stdout, content);*/
free(query);
free(ip);
close(sockfd);
printf("##### Connection closed by server.\n");
exit(EXIT_SUCCESS);
}
最佳答案
行:
if(read(sockfd, chunkBuf, chunkSize + 2) == 0) ...
最多读取 chunkSize+2,即它可以读得更少。请参阅 read 的手册页。您的代码应类似于:
int n = 0;
while (n<chunkSize) {
r = read(sockfd, chunkBuf+n, chunkSize - n);
if (r <= 0) { error or closed conection ... }
n += r;
}
关于c - 解析分块的 HTTP/1.1 响应,我们在Stack Overflow上找到一个类似的问题: https://stackoverflow.com/questions/28359484/
我有一个字符串input="maybe(thisis|thatwas)some((nice|ugly)(day|night)|(strange(weather|time)))"Ruby中解析该字符串的最佳方法是什么?我的意思是脚本应该能够像这样构建句子:maybethisissomeuglynightmaybethatwassomenicenightmaybethiswassomestrangetime等等,你明白了......我应该一个字符一个字符地读取字符串并构建一个带有堆栈的状态机来存储括号值以供以后计算,还是有更好的方法?也许为此目的准备了一个开箱即用的库?
我主要使用Ruby来执行此操作,但到目前为止我的攻击计划如下:使用gemsrdf、rdf-rdfa和rdf-microdata或mida来解析给定任何URI的数据。我认为最好映射到像schema.org这样的统一模式,例如使用这个yaml文件,它试图描述数据词汇表和opengraph到schema.org之间的转换:#SchemaXtoschema.orgconversion#data-vocabularyDV:name:namestreet-address:streetAddressregion:addressRegionlocality:addressLocalityphoto:i
我正在使用ruby1.9解析以下带有MacRoman字符的csv文件#encoding:ISO-8859-1#csv_parse.csvName,main-dialogue"Marceu","Giveittohimóhe,hiswife."我做了以下解析。require'csv'input_string=File.read("../csv_parse.rb").force_encoding("ISO-8859-1").encode("UTF-8")#=>"Name,main-dialogue\r\n\"Marceu\",\"Giveittohim\x97he,hiswife.\"\
是的,我知道最好使用webmock,但我想知道如何在RSpec中模拟此方法:defmethod_to_testurl=URI.parseurireq=Net::HTTP::Post.newurl.pathres=Net::HTTP.start(url.host,url.port)do|http|http.requestreq,foo:1endresend这是RSpec:let(:uri){'http://example.com'}specify'HTTPcall'dohttp=mock:httpNet::HTTP.stub!(:start).and_yieldhttphttp.shou
我是Google云的新手,我正在尝试对其进行首次部署。我的第一个部署是RubyonRails项目。我基本上是在关注thisguideinthegoogleclouddocumentation.唯一的区别是我使用的是我自己的项目,而不是他们提供的“helloworld”项目。这是我的app.yaml文件runtime:customvm:trueentrypoint:bundleexecrackup-p8080-Eproductionconfig.ruresources:cpu:0.5memory_gb:1.3disk_size_gb:10当我转到我的项目目录并运行gcloudprevie
我目前正在使用以下方法获取页面的源代码:Net::HTTP.get(URI.parse(page.url))我还想获取HTTP状态,而无需发出第二个请求。有没有办法用另一种方法做到这一点?我一直在查看文档,但似乎找不到我要找的东西。 最佳答案 在我看来,除非您需要一些真正的低级访问或控制,否则最好使用Ruby的内置Open::URI模块:require'open-uri'io=open('http://www.example.org/')#=>#body=io.read[0,50]#=>"["200","OK"]io.base_ur
简而言之错误:NOTE:Gem::SourceIndex#add_specisdeprecated,useSpecification.add_spec.Itwillberemovedonorafter2011-11-01.Gem::SourceIndex#add_speccalledfrom/opt/local/lib/ruby/site_ruby/1.8/rubygems/source_index.rb:91./opt/local/lib/ruby/gems/1.8/gems/rails-2.3.8/lib/rails/gem_dependency.rb:275:in`==':und
1.错误信息:Errorresponsefromdaemon:Gethttps://registry-1.docker.io/v2/:net/http:requestcanceledwhilewaitingforconnection(Client.Timeoutexceededwhileawaitingheaders)或者:Errorresponsefromdaemon:Gethttps://registry-1.docker.io/v2/:net/http:TLShandshaketimeout2.报错原因:docker使用的镜像网址默认为国外,下载容易超时,需要修改成国内镜像地址(首先阿里
Rails中有没有一种方法可以提取与路由关联的HTTP动词?例如,给定这样的路线:将“users”匹配到:“users#show”,通过:[:get,:post]我能实现这样的目标吗?users_path.respond_to?(:get)(显然#respond_to不是正确的方法)我最接近的是通过执行以下操作,但它似乎并不令人满意。Rails.application.routes.routes.named_routes["users"].constraints[:request_method]#=>/^GET$/对于上下文,我有一个设置cookie然后执行redirect_to:ba
我正在使用ruby2.1.0我有一个json文件。例如:test.json{"item":[{"apple":1},{"banana":2}]}用YAML.load加载这个文件安全吗?YAML.load(File.read('test.json'))我正在尝试加载一个json或yaml格式的文件。 最佳答案 YAML可以加载JSONYAML.load('{"something":"test","other":4}')=>{"something"=>"test","other"=>4}JSON将无法加载YAML。JSON.load("