Skip to content

Add support for subproject and per-version sitemaps, and styled main sitemap #12249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions readthedocs/core/static/core/xsl/sitemap.xslt
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet
version="3.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
xmlns:sitemap="http://www.sitemaps.org/schemas/sitemap/0.9"
>
<xsl:output method="html" indent="yes" encoding="UTF-8" />
<xsl:template match="/">
<html>
<head>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<style>
main p,
main h1,
main h2 {
max-width: 50rem;
}
</style>
</head>
<body>
<main>
<h1>Documentation sitemap</h1>

<p>
This sitemap is autogenerated by Read the Docs. For projects with
subprojects, this sitemap links to the sitemap for each individual
subproject. For projects without subprojects, this sitemap links to
the project's public versions, sorted by version number.
</p>

<h2>Links</h2>

<ul>
<xsl:for-each select="sitemap:urlset/sitemap:url">
<xsl:variable name="sitemap_loc"
><xsl:value-of select="sitemap:loc"
/></xsl:variable>
<xsl:variable name="sitemap_lastmod"
><xsl:value-of select="sitemap:lastmod"
/></xsl:variable>
<li>
URL:
<a href="{$sitemap_loc}"
><xsl:value-of select="sitemap:loc"
/></a>
<xsl:if test="$sitemap_lastmod!=''">
(<xsl:value-of select="sitemap:lastmod" />)
</xsl:if>
</li>
</xsl:for-each>
<xsl:for-each select="sitemap:sitemapindex/sitemap:sitemap">
<xsl:variable name="sitemap_loc"
><xsl:value-of select="sitemap:loc"
/></xsl:variable>
<xsl:variable name="sitemap_lastmod"
><xsl:value-of select="sitemap:lastmod"
/></xsl:variable>
<li>
Sitemap:
<a href="{$sitemap_loc}"
><xsl:value-of select="sitemap:loc"
/></a>
<xsl:if test="$sitemap_lastmod!=''">
(<xsl:value-of select="sitemap:lastmod" />)
</xsl:if>
</li>
</xsl:for-each>
</ul>

<h2>Learn more</h2>

<dl>
<dt>
<a
href="https://docs.readthedocs.com/platform/stable/reference/sitemaps.html"
>Sitemap documentation</a
>
</dt>
<dd>
How sitemaps are generated and how to use and customize them for
your projects.
</dd>
<dt>
<a
href="https://docs.readthedocs.com/platform/stable/guides/technical-docs-seo-guide.html"
>Our guide about SEO techniques</a
>
</dt>
<dd>
How sitemaps affect SEO and best practices to indexing your
documentation content.
</dd>
</dl>
</main>
</body>
</html>
</xsl:template>
</xsl:stylesheet>
102 changes: 90 additions & 12 deletions readthedocs/proxito/views/serve.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,13 @@
from readthedocs.core.unresolver import unresolver
from readthedocs.core.utils.extend import SettingsOverrideObject
from readthedocs.core.utils.requests import is_suspicious_request
from readthedocs.core.utils.url import unsafe_join_url_path
from readthedocs.projects.constants import OLD_LANGUAGES_CODE_MAPPING
from readthedocs.projects.constants import PRIVATE
from readthedocs.projects.models import Domain
from readthedocs.projects.models import Feature
from readthedocs.projects.models import HTMLFile
from readthedocs.projects.models import Project
from readthedocs.projects.templatetags.projects_tags import sort_version_aware
from readthedocs.proxito.constants import RedirectType
from readthedocs.proxito.exceptions import ContextualizedHttp404
Expand Down Expand Up @@ -780,7 +782,7 @@ class ServeRobotsTXT(SettingsOverrideObject):


class ServeSitemapXMLBase(CDNCacheControlMixin, CDNCacheTagsMixin, View):
"""Serve sitemap.xml from the domain's root."""
"""Serve a sitemap from the project root."""

# Always cache this view, since it's the same for all users.
cache_response = True
Expand All @@ -789,7 +791,90 @@ class ServeSitemapXMLBase(CDNCacheControlMixin, CDNCacheTagsMixin, View):

def get(self, request):
"""
Generate and serve a ``sitemap.xml`` for a particular ``project``.
Generate and serve project sitemap.

Without any params, generate a sitemap index pointing to a sitemap for
each version of this project and it's subprojects.
"""
context = {}
if "versions" in request.GET:
context["versions"] = self.get_urlset(request)
else:
context["sitemaps"] = self.get_sitemapindex(request)
return render(
request,
"sitemap.xml",
context,
content_type="application/xml",
)

def get_sitemapindex(self, request):
"""
Generate and serve a sitemapindex sitemap pointing to other sitemaps.

This will point towards:

- Project urlset sitemap for links to all versions, using :py:method:`get_urlset`
- Sitemaps for each project version
- Sitemaps for each subproject
"""

def get_public_versions(project, resolver=None):
if resolver is None:
resolver = Resolver()
public_versions = Version.internal.public(
project=project,
only_active=True,
include_hidden=False,
)
for version in public_versions:
prefix = resolver.resolve(
project=project,
version_slug=version.slug,
)
parsed_prefix = urlparse(prefix)
sitemap_url = parsed_prefix._replace(
path=unsafe_join_url_path(parsed_prefix.path, "sitemap.xml"),
).geturl()

sitemap = {
"loc": sitemap_url,
}
# Version can be enabled, but not ``built`` yet. We want to show the
# link without a ``lastmod`` attribute
last_build = version.builds.order_by("-date").first()
if last_build:
sitemap["lastmod"] = last_build.date.isoformat()

yield sitemap

# TODO is it important to share this between lookups? I saw other
# attempts to share this, I'm guessing to decrease lookup latency?
resolver = Resolver()

sitemaps = []

project = request.unresolved_domain.project
public_subprojects = Project.objects.public().filter(superprojects__parent=project)

# Version urlset list first. This is useful if the project doesn't
# output a sitemap per version.
sitemaps.append(
{
"loc": request.build_absolute_uri() + "?versions",
}
)

# Links to subproject and project versioned sitemaps
sitemaps.extend(get_public_versions(project, resolver=resolver))
for subproject in public_subprojects:
sitemaps.extend(get_public_versions(subproject, resolver=resolver))

return sitemaps

def get_urlset(self, request):
"""
Generate and serve a urlset sitemap for all public project versions.

The sitemap is generated from all the ``active`` and public versions of
``project``. These versions are sorted by using semantic versioning
Expand Down Expand Up @@ -846,11 +931,13 @@ def changefreqs_generator():
yield from itertools.chain(changefreqs, itertools.repeat("monthly"))

project = request.unresolved_domain.project

public_versions = Version.internal.public(
project=project,
only_active=True,
include_hidden=False,
)

if not public_versions.exists():
raise Http404()

Expand Down Expand Up @@ -919,16 +1006,7 @@ def changefreqs_generator():
)

versions.append(element)

context = {
"versions": versions,
}
return render(
request,
"sitemap.xml",
context,
content_type="application/xml",
)
return versions

def _get_project(self):
# Method used by the CDNCacheTagsMixin class.
Expand Down
71 changes: 45 additions & 26 deletions readthedocs/templates/sitemap.xml
Original file line number Diff line number Diff line change
@@ -1,30 +1,49 @@
<?xml version="1.0" encoding="UTF-8"?>
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:xhtml="http://www.w3.org/1999/xhtml">
<!--
This sitemap is autogenerated by Read the Docs.
It contains all public versions,
sorted by version number.

You can learn more about sitemaps, including how to customize them, in our documentation:
{# First byte needs to be opening tag in XML #}
{% load static from static %}

* Our documentation on Sitemaps: https://docs.readthedocs.com/platform/stable/reference/sitemaps.html
* Our guide about SEO techniques: https://docs.readthedocs.com/platform/stable/guides/technical-docs-seo-guide.html
-->
{% for version in versions %}
<url>
<loc>{{ version.loc }}</loc>
{% for language in version.languages %}
<xhtml:link
rel="alternate"
hreflang="{{ language.hreflang }}"
href="{{ language.href }}"/>
{# Use template for human readable version and informational content #}
<?xml-stylesheet type="text/xml" href="{% static "/core/xsl/sitemap.xslt" %}"?>

{% comment %}
Sitemaps don't have a shared top level entity so we can only show one of these at a time:

- A ``sitemapindex`` pointing to other sitemaps
- A ``urlset`` pointing to URLs for project public versions

The view handles how to show one list or the other.
{% endcomment %}

{% if sitemaps %}
<sitemapindex xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
{% for sitemap in sitemaps %}
<sitemap>
<loc>{{ sitemap.loc }}</loc>
{% if sitemap.lastmod %}
<lastmod>{{ sitemap.lastmod }}</lastmod>
{% endif %}
</sitemap>
{% endfor %}
</sitemapindex>
{% elif versions %}
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
xmlns:xhtml="http://www.w3.org/1999/xhtml">
{% for version in versions %}
<url>
<loc>{{ version.loc }}</loc>
{% for language in version.languages %}
<xhtml:link
rel="alternate"
hreflang="{{ language.hreflang }}"
href="{{ language.href }}"/>
{% endfor %}
{% if version.lastmod %}
<lastmod>{{ version.lastmod }}</lastmod>
{% endif %}
<changefreq>{{ version.changefreq }}</changefreq>
<priority>{{ version.priority }}</priority>
</url>
{% endfor %}
{% if version.lastmod %}
<lastmod>{{ version.lastmod }}</lastmod>
{% endif %}
<changefreq>{{ version.changefreq }}</changefreq>
<priority>{{ version.priority }}</priority>
</url>
{% endfor %}
</urlset>
</urlset>
{% endif %}