From 0431f38a4eeb0b88a8d31fa8e858f3e4598aa569 Mon Sep 17 00:00:00 2001 From: NitroMelon Date: Thu, 28 Oct 2021 21:53:58 +0800 Subject: [PATCH] Support range for static file (#1060) --- CMakeLists.txt | 1 + lib/src/RangeParser.cc | 177 ++++++++++++++++ lib/src/RangeParser.h | 40 ++++ lib/src/StaticFileRouter.cc | 237 +++++++++++++++------- lib/src/StaticFileRouter.h | 1 + lib/tests/integration_test/client/main.cc | 49 ++++- lib/tests/integration_test/server/main.cc | 2 +- 7 files changed, 435 insertions(+), 72 deletions(-) create mode 100644 lib/src/RangeParser.cc create mode 100644 lib/src/RangeParser.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 3f929a4f..7b4177e6 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -263,6 +263,7 @@ set(DROGON_SOURCES lib/src/MultiPart.cc lib/src/NotFound.cc lib/src/PluginsManager.cc + lib/src/RangeParser.cc lib/src/SecureSSLRedirector.cc lib/src/AccessLogger.cc lib/src/SessionManager.cc diff --git a/lib/src/RangeParser.cc b/lib/src/RangeParser.cc new file mode 100644 index 00000000..12bb32e9 --- /dev/null +++ b/lib/src/RangeParser.cc @@ -0,0 +1,177 @@ +/** + * + * RangeParser.h + * He, Wanchen + * + * Copyright 2021, He,Wanchen. All rights reserved. + * https://github.com/drogonframework/drogon + * Use of this source code is governed by a MIT license + * that can be found in the License file. + * + * Drogon + * + */ + +#include "RangeParser.h" + +#include + +using namespace drogon; + +static constexpr size_t MAX_SIZE = std::numeric_limits::max(); +static constexpr size_t MAX_TEN = MAX_SIZE / 10; +static constexpr size_t MAX_DIGIT = MAX_SIZE % 10; + +// clang-format off +#define DR_SKIP_WHITESPACE(p) while (*p == ' ') { ++(p); } +#define DR_ISDIGIT(p) ('0' <= *(p) && *(p) <= '9') +#define DR_WOULD_OVERFLOW(base, digit) \ + ((base) > MAX_TEN || (base >= MAX_TEN && (digit) - '0' > MAX_DIGIT)) +// clang-format on + +/** Following formats are valid range header according to rfc7233` + * Range: =- + * Range: =- + * Range: =-, - + * Range: =-, -, - + * Range: =- + */ + +FileRangeParseResult drogon::parseRangeHeader(const std::string &rangeStr, + size_t contentLength, + std::vector &ranges) +{ + if (rangeStr.size() < 7 || rangeStr.compare(0, 6, "bytes=") != 0) + { + return InvalidRange; + } + const char *iter = rangeStr.c_str() + 6; + + size_t totalSize = 0; + while (true) + { + size_t start = 0; + size_t end = 0; + // If this is a suffix range: =- + bool isSuffix = false; + + DR_SKIP_WHITESPACE(iter); + + if (*iter == '-') + { + isSuffix = true; + ++iter; + } + // Parse start + else + { + if (!DR_ISDIGIT(iter)) + { + return InvalidRange; + } + while (DR_ISDIGIT(iter)) + { + // integer out of range + if (DR_WOULD_OVERFLOW(start, *iter)) + { + return NotSatisfiable; + } + start = start * 10 + (*iter++ - '0'); + } + DR_SKIP_WHITESPACE(iter); + // should be separator now + if (*iter++ != '-') + { + return InvalidRange; + } + DR_SKIP_WHITESPACE(iter); + // If this is a prefix range =- + if (*iter == ',' || *iter == '\0') + { + end = contentLength; + // Handle found + if (start < end) + { + if (totalSize > MAX_SIZE - (end - start)) + { + return NotSatisfiable; + } + totalSize += end - start; + ranges.push_back({start, end}); + } + if (*iter++ != ',') + { + break; + } + continue; + } + } + + // Parse end + if (!DR_ISDIGIT(iter)) + { + return InvalidRange; + } + while (DR_ISDIGIT(iter)) + { + if (DR_WOULD_OVERFLOW(end, *iter)) + { + return NotSatisfiable; + } + end = end * 10 + (*iter++ - '0'); + } + DR_SKIP_WHITESPACE(iter); + + if (*iter != ',' && *iter != '\0') + { + return InvalidRange; + } + if (isSuffix) + { + start = (end < contentLength) ? contentLength - end : 0; + end = contentLength - 1; + } + // [start, end) + if (end >= contentLength) + { + end = contentLength; + } + else + { + ++end; + } + + // handle found + if (start < end) + { + ranges.push_back({start, end}); + if (totalSize > MAX_SIZE - (end - start)) + { + return NotSatisfiable; + } + totalSize += end - start; + // We restrict the number to be under 100, to avoid malicious + // requests. + // Though rfc does not say anything about max number of ranges, + // it does mention that server can ignore range header freely. + if (ranges.size() > 100) + { + return InvalidRange; + } + } + if (*iter++ != ',') + { + break; + } + } + + if (ranges.size() == 0 || totalSize > contentLength) + { + return NotSatisfiable; + } + return ranges.size() == 1 ? SinglePart : MultiPart; +} + +#undef DR_SKIP_WHITESPACE +#undef DR_ISDIGIT +#undef DR_WOULD_OVERFLOW diff --git a/lib/src/RangeParser.h b/lib/src/RangeParser.h new file mode 100644 index 00000000..f0dcd55e --- /dev/null +++ b/lib/src/RangeParser.h @@ -0,0 +1,40 @@ +/** + * + * RangeParser.h + * He, Wanchen + * + * Copyright 2021, He,Wanchen. All rights reserved. + * https://github.com/drogonframework/drogon + * Use of this source code is governed by a MIT license + * that can be found in the License file. + * + * Drogon + * + */ + +#include +#include +#include + +namespace drogon +{ +// [start, end) +struct FileRange +{ + size_t start; + size_t end; +}; + +enum FileRangeParseResult +{ + InvalidRange = -1, + NotSatisfiable = 0, + SinglePart = 1, + MultiPart = 2 +}; + +FileRangeParseResult parseRangeHeader(const std::string &rangeStr, + size_t contentLength, + std::vector &ranges); + +} // namespace drogon diff --git a/lib/src/StaticFileRouter.cc b/lib/src/StaticFileRouter.cc index bb5f16f3..ae6597c5 100644 --- a/lib/src/StaticFileRouter.cc +++ b/lib/src/StaticFileRouter.cc @@ -16,6 +16,7 @@ #include "HttpAppFrameworkImpl.h" #include "HttpRequestImpl.h" #include "HttpResponseImpl.h" +#include "RangeParser.h" #include #include #include @@ -259,12 +260,153 @@ void StaticFileRouter::route( defaultHandler_(req, std::move(callback)); } +// Expand this struct as you need, nothing to worry about +struct FileStat +{ + size_t fileSize_; + struct tm modifiedTime_; + std::string modifiedTimeStr_; +}; + +// A wrapper to call stat() +// std::filesystem::file_time_type::clock::to_time_t still not +// implemented by M$, even in c++20, so keep calls to stat() +static bool getFileStat(const std::string &filePath, FileStat &myStat) +{ +#if defined(_WIN32) && !defined(__MINGW32__) + struct _stati64 fileStat; +#else // _WIN32 + struct stat fileStat; +#endif // _WIN32 + if (stat(utils::toNativePath(filePath).c_str(), &fileStat) == 0 && + S_ISREG(fileStat.st_mode)) + { + LOG_TRACE << "last modify time:" << fileStat.st_mtime; +#ifdef _WIN32 + gmtime_s(&myStat.modifiedTime_, &fileStat.st_mtime); +#else + gmtime_r(&fileStat.st_mtime, &myStat.modifiedTime_); +#endif + std::string &timeStr = myStat.modifiedTimeStr_; + timeStr.resize(64); + size_t len = strftime(timeStr.data(), + timeStr.size(), + "%a, %d %b %Y %H:%M:%S GMT", + &myStat.modifiedTime_); + timeStr.resize(len); + + myStat.fileSize_ = fileStat.st_size; + return true; + } + + return false; +} + void StaticFileRouter::sendStaticFileResponse( const std::string &filePath, const HttpRequestImplPtr &req, std::function &&callback, const string_view &defaultContentType) -{ // find cached response +{ + if (req->method() != Get) + { + callback(app().getCustomErrorHandler()(k405MethodNotAllowed)); + return; + } + + FileStat fileStat; + bool fileExists = false; + const std::string &rangeStr = req->getHeaderBy("range"); + if (enableRange_ && !rangeStr.empty()) + { + if (!getFileStat(filePath, fileStat)) + { + defaultHandler_(req, std::move(callback)); + return; + } + fileExists = true; + // Check last modified time, rfc2616-14.25 + // If-Modified-Since: Mon, 15 Oct 2018 06:26:33 GMT + // According to rfc 7233-3.1, preconditions must be evaluated before + const std::string &modiStr = req->getHeaderBy("if-modified-since"); + if (enableLastModify_ && modiStr == fileStat.modifiedTimeStr_) + { + LOG_TRACE << "Not modified!"; + std::shared_ptr resp = + std::make_shared(); + resp->setStatusCode(k304NotModified); + resp->setContentTypeCode(CT_NONE); + HttpAppFrameworkImpl::instance().callCallback(req, resp, callback); + return; + } + // Check If-Range precondition + const std::string &ifRange = req->getHeaderBy("if-range"); + if (ifRange.empty() || ifRange == fileStat.modifiedTimeStr_) + { + std::vector ranges; + switch (parseRangeHeader(rangeStr, fileStat.fileSize_, ranges)) + { + // TODO: support only single range now + // Contributions are welcomed. + case FileRangeParseResult::SinglePart: + case FileRangeParseResult::MultiPart: + { + auto firstRange = ranges.front(); + auto ct = fileNameToContentTypeAndMime(filePath); + auto resp = + HttpResponse::newFileResponse(filePath, + firstRange.start, + firstRange.end - + firstRange.start, + true, + "", + ct.first, + std::string(ct.second)); + if (!fileStat.modifiedTimeStr_.empty()) + { + resp->addHeader("Last-Modified", + fileStat.modifiedTimeStr_); + resp->addHeader("Expires", + "Thu, 01 Jan 1970 00:00:00 GMT"); + } + HttpAppFrameworkImpl::instance().callCallback(req, + resp, + callback); + return; + } + case FileRangeParseResult::NotSatisfiable: + { + auto resp = HttpResponse::newHttpResponse(); + resp->setStatusCode(k416RequestedRangeNotSatisfiable); + char buf[64]; + snprintf(buf, + sizeof(buf), + "bytes */%zu", + fileStat.fileSize_); + resp->addHeader("Content-Range", std::string(buf)); + HttpAppFrameworkImpl::instance().callCallback(req, + resp, + callback); + return; + } + /** rfc7233 4.4. + * > Note: Because servers are free to ignore Range, many + * implementations will simply respond with the entire selected + * representation in a 200 (OK) response. That is partly + * because most clients are prepared to receive a 200 (OK) to + * complete the task (albeit less efficiently) and partly + * because clients might not stop making an invalid partial + * request until they have received a complete representation. + * Thus, clients cannot depend on receiving a 416 (Range Not + * Satisfiable) response even when it is most appropriate. + */ + default: + break; + } + } + } + + // find cached response HttpResponsePtr cachedResp; auto &cacheMap = staticFilesCache_->getThreadData(); auto iter = cacheMap.find(filePath); @@ -273,20 +415,10 @@ void StaticFileRouter::sendStaticFileResponse( cachedResp = iter->second; } - // check last modified time,rfc2616-14.25 - // If-Modified-Since: Mon, 15 Oct 2018 06:26:33 GMT - - std::string timeStr; - bool fileExists{false}; if (enableLastModify_) { if (cachedResp) { - if (req->method() != Get) - { - callback(app().getCustomErrorHandler()(k405MethodNotAllowed)); - return; - } if (static_cast(cachedResp.get()) ->getHeaderBy("last-modified") == req->getHeaderBy("if-modified-since")) @@ -304,71 +436,36 @@ void StaticFileRouter::sendStaticFileResponse( else { LOG_TRACE << "enabled LastModify"; - // std::filesystem::file_time_type::clock::to_time_t still not - // implemented by M$, even in c++20, so keep calls to stat() -#if defined(_WIN32) && !defined(__MINGW32__) - struct _stati64 fileStat; -#else // _WIN32 - struct stat fileStat; -#endif // _WIN32 - if (stat(utils::toNativePath(filePath).c_str(), &fileStat) == 0 && - S_ISREG(fileStat.st_mode)) - { - fileExists = true; - LOG_TRACE << "last modify time:" << fileStat.st_mtime; - if (req->method() != Get) - { - callback( - app().getCustomErrorHandler()(k405MethodNotAllowed)); - return; - } - struct tm tm1; -#ifdef _WIN32 - gmtime_s(&tm1, &fileStat.st_mtime); -#else - gmtime_r(&fileStat.st_mtime, &tm1); -#endif - timeStr.resize(64); - auto len = strftime((char *)timeStr.data(), - timeStr.size(), - "%a, %d %b %Y %H:%M:%S GMT", - &tm1); - timeStr.resize(len); - const std::string &modiStr = - req->getHeaderBy("if-modified-since"); - if (modiStr == timeStr && !modiStr.empty()) - { - LOG_TRACE << "not Modified!"; - std::shared_ptr resp = - std::make_shared(); - resp->setStatusCode(k304NotModified); - resp->setContentTypeCode(CT_NONE); - HttpAppFrameworkImpl::instance().callCallback(req, - resp, - callback); - return; - } - } - else + if (!fileExists && !getFileStat(filePath, fileStat)) { defaultHandler_(req, std::move(callback)); return; } + fileExists = true; + const std::string &modiStr = req->getHeaderBy("if-modified-since"); + if (modiStr == fileStat.modifiedTimeStr_) + { + LOG_TRACE << "not Modified!"; + std::shared_ptr resp = + std::make_shared(); + resp->setStatusCode(k304NotModified); + resp->setContentTypeCode(CT_NONE); + HttpAppFrameworkImpl::instance().callCallback(req, + resp, + callback); + return; + } } } if (cachedResp) { - if (req->method() != Get) - { - callback(app().getCustomErrorHandler()(k405MethodNotAllowed)); - return; - } LOG_TRACE << "Using file cache"; HttpAppFrameworkImpl::instance().callCallback(req, cachedResp, callback); return; } + // Check existence if (!fileExists) { filesystem::path fsFilePath(utils::toNativePath(filePath)); @@ -381,12 +478,6 @@ void StaticFileRouter::sendStaticFileResponse( } } - if (req->method() != Get) - { - callback(app().getCustomErrorHandler()(k405MethodNotAllowed)); - return; - } - HttpResponsePtr resp; auto &acceptEncoding = req->getHeaderBy("accept-encoding"); @@ -441,11 +532,15 @@ void StaticFileRouter::sendStaticFileResponse( resp->setContentTypeCodeAndCustomString(CT_CUSTOM, defaultContentType); } - if (!timeStr.empty()) + if (!fileStat.modifiedTimeStr_.empty()) { - resp->addHeader("Last-Modified", timeStr); + resp->addHeader("Last-Modified", fileStat.modifiedTimeStr_); resp->addHeader("Expires", "Thu, 01 Jan 1970 00:00:00 GMT"); } + if (enableRange_) + { + resp->addHeader("accept-range", "bytes"); + } if (!headers_.empty()) { for (auto &header : headers_) @@ -474,6 +569,7 @@ void StaticFileRouter::sendStaticFileResponse( callback(resp); return; } + void StaticFileRouter::setFileTypes(const std::vector &types) { fileTypeSet_.clear(); @@ -482,6 +578,7 @@ void StaticFileRouter::setFileTypes(const std::vector &types) fileTypeSet_.insert(type); } } + void StaticFileRouter::defaultHandler( const HttpRequestPtr & /*req*/, std::function &&callback) diff --git a/lib/src/StaticFileRouter.h b/lib/src/StaticFileRouter.h index f0f8e98d..cce08950 100644 --- a/lib/src/StaticFileRouter.h +++ b/lib/src/StaticFileRouter.h @@ -125,6 +125,7 @@ class StaticFileRouter int staticFilesCacheTime_{5}; bool enableLastModify_{true}; + bool enableRange_{true}; bool gzipStaticFlag_{true}; bool brStaticFlag_{true}; std::unique_ptr< diff --git a/lib/tests/integration_test/client/main.cc b/lib/tests/integration_test/client/main.cc index 471d7545..2aadd0fa 100644 --- a/lib/tests/integration_test/client/main.cc +++ b/lib/tests/integration_test/client/main.cc @@ -712,7 +712,7 @@ void doTest(const HttpClientPtr &client, std::shared_ptr TEST_CTX) CHECK((*json)["P2"] == "test"); }); - // Test send file by range + // Test newFileResponse req = HttpRequest::newHttpRequest(); req->setPath("/RangeTestController/"); client->sendRequest( @@ -766,6 +766,53 @@ void doTest(const HttpClientPtr &client, std::shared_ptr TEST_CTX) CHECK(resp->getStatusCode() == k416RequestedRangeNotSatisfiable); }); + // + // Test StaticFileRouter with range header + // + req = HttpRequest::newHttpRequest(); + req->setPath("/range-test.txt"); + req->setMethod(drogon::Head); + client->sendRequest( + req, [req, TEST_CTX](ReqResult result, const HttpResponsePtr &resp) { + REQUIRE(result == ReqResult::Ok); + CHECK(resp->getStatusCode() == k200OK); + CHECK(resp->getHeader("content-length") == "1000000"); + CHECK(resp->getHeader("accept-range") == "bytes"); + }); + + req = HttpRequest::newHttpRequest(); + req->setPath("/range-test.txt"); + req->addHeader("range", "bytes=0-19"); + client->sendRequest(req, + [req, TEST_CTX](ReqResult result, + const HttpResponsePtr &resp) { + REQUIRE(result == ReqResult::Ok); + CHECK(resp->getStatusCode() == k206PartialContent); + CHECK(resp->getBody() == "01234567890123456789"); + }); + + req = HttpRequest::newHttpRequest(); + req->setPath("/range-test.txt"); + req->addHeader("range", "bytes=-20"); + client->sendRequest(req, + [req, TEST_CTX](ReqResult result, + const HttpResponsePtr &resp) { + REQUIRE(result == ReqResult::Ok); + CHECK(resp->getStatusCode() == k206PartialContent); + CHECK(resp->getBody() == "01234567890123456789"); + }); + + req = HttpRequest::newHttpRequest(); + req->setPath("/range-test.txt"); + req->addHeader("range", "bytes=999980-"); + client->sendRequest(req, + [req, TEST_CTX](ReqResult result, + const HttpResponsePtr &resp) { + REQUIRE(result == ReqResult::Ok); + CHECK(resp->getStatusCode() == k206PartialContent); + CHECK(resp->getBody() == "01234567890123456789"); + }); + // Using .. to access a upper directory should be permitted as long as // it never leaves the document root req = HttpRequest::newHttpRequest(); diff --git a/lib/tests/integration_test/server/main.cc b/lib/tests/integration_test/server/main.cc index 573eb952..e6dac46c 100644 --- a/lib/tests/integration_test/server/main.cc +++ b/lib/tests/integration_test/server/main.cc @@ -362,7 +362,7 @@ int main() } }); app().registerCustomExtensionMime("md", "text/markdown"); - app().setFileTypes({"md", "html", "jpg", "cc"}); + app().setFileTypes({"md", "html", "jpg", "cc", "txt"}); std::cout << "Date: " << std::string{drogon::utils::getHttpFullDate( trantor::Date::now())}