Add is_bot label to Pyramid request metric

This will make it possible to separate out crawler traffic from real
users.
This commit is contained in:
Deimos
2019-09-10 20:00:40 -06:00
parent 224efb8c77
commit 9ccbc162c4
2 changed files with 21 additions and 1 deletions

View File

@@ -19,6 +19,24 @@ def get_redis_connection(request: Request) -> Redis:
return Redis(unix_socket_path=socket)
def is_bot(request: Request) -> bool:
"""Return whether the request is by a known bot (e.g. search engine crawlers)."""
bot_user_agent_substrings = (
"bingbot",
"Googlebot",
"qotnews scraper",
"Qwantify",
"YandexBot",
)
if request.user_agent:
return any(
[substring in request.user_agent for substring in bot_user_agent_substrings]
)
return False
def is_safe_request_method(request: Request) -> bool:
"""Return whether the request method is "safe" (is GET or HEAD)."""
return request.method in {"GET", "HEAD"}
@@ -124,6 +142,7 @@ def current_listing_normal_url(
def includeme(config: Configurator) -> None:
"""Attach the request methods to the Pyramid request object."""
config.add_request_method(is_bot, "is_bot", reify=True)
config.add_request_method(is_safe_request_method, "is_safe_method", reify=True)
# Add the request.redis request method to access a redis connection. This is done in

View File

@@ -42,7 +42,7 @@ def metrics_tween_factory(handler: Callable, registry: Registry) -> Callable:
request_histogram = Histogram(
"tildes_pyramid_requests_seconds",
"Request processing times",
labelnames=["route", "status_code", "method", "logged_in"],
labelnames=["route", "status_code", "method", "logged_in", "is_bot"],
)
def metrics_tween(request: Request) -> Response:
@@ -60,6 +60,7 @@ def metrics_tween_factory(handler: Callable, registry: Registry) -> Callable:
status_code=response.status_code,
method=request.method,
logged_in=str(bool(request.user)).lower(),
is_bot=str(request.is_bot).lower(),
).observe(duration)
return response