Better server files like robots, 404, etc.
This commit is contained in:
parent
4ce61f8b40
commit
528b0ae80e
59
Rules
59
Rules
|
@ -1,17 +1,28 @@
|
|||
#!/usr/bin/env ruby
|
||||
|
||||
BYPASS_FILES = %w(404.html crossdomain.xml humans.txt robots.txt) unless defined?(BYPASS_FILES)
|
||||
|
||||
BYPASS_FILES.each do |file|
|
||||
compile("/#{file.sub /\..+/, ''}/") do
|
||||
# don't filter bypass files
|
||||
preprocess do
|
||||
create_robots_txt
|
||||
create_webmaster_tools_authentications
|
||||
create_sitemap
|
||||
end
|
||||
|
||||
compile %r{^/(google|robots|assets)} do
|
||||
end
|
||||
|
||||
compile %r{/_.+/$} do
|
||||
# don't filter partials
|
||||
end
|
||||
|
||||
# Sitemap and htaccess do get filtered with erb, but get no layout.
|
||||
compile %r{^/(sitemap|htaccess)/$} do
|
||||
filter :erb
|
||||
end
|
||||
|
||||
compile '/sitemap/', :rep => 'gzip' do
|
||||
filter :erb
|
||||
filter :shellcmd, :cmd => 'gzip'
|
||||
end
|
||||
|
||||
compile '/css/*/' do
|
||||
# filter :sass, syntax: :scss, load_paths: SASS_LOAD_PATHS
|
||||
filter :sass, Compass.sass_engine_options
|
||||
|
@ -36,6 +47,13 @@ compile '/posts/*' do
|
|||
filter :cache_buster
|
||||
end
|
||||
|
||||
compile %r{^/(404)/$} do
|
||||
filter :haml, format: :html5, ugly: true
|
||||
|
||||
layout 'default'
|
||||
filter :cache_buster
|
||||
end
|
||||
|
||||
compile '*' do
|
||||
unless item.binary?
|
||||
case item[:extension]
|
||||
|
@ -55,16 +73,19 @@ compile '*' do
|
|||
end
|
||||
end
|
||||
|
||||
BYPASS_FILES.each do |file|
|
||||
route("/#{file.sub /\..+/, ''}/") do
|
||||
"/#{file}" # route bypass files as is
|
||||
end
|
||||
end
|
||||
|
||||
route %r{/_.+/$} do
|
||||
nil # don't route partials
|
||||
end
|
||||
|
||||
route %r{^/(assets/.*|sitemap|robots|atom)/$} do
|
||||
ext = item[:extension]
|
||||
ext = 'js' if ext == 'coffee'
|
||||
ext = 'css' if ext == 'scss'
|
||||
|
||||
fp = cachebust?(item) ? fingerprint(item[:filename]) : ''
|
||||
item.identifier.chop + fp + '.' + ext
|
||||
end
|
||||
|
||||
route '/css/*/' do
|
||||
fp = fingerprint(item[:filename])
|
||||
item.identifier.chop + fp + '.css'
|
||||
|
@ -79,6 +100,22 @@ route '/rss/' do
|
|||
'/rss.xml'
|
||||
end
|
||||
|
||||
route '/htaccess/' do
|
||||
'/.htaccess'
|
||||
end
|
||||
|
||||
route '/sitemap/', :rep => 'gzip' do
|
||||
'/sitemap.xml.gz'
|
||||
end
|
||||
|
||||
route '/sitemap/' do
|
||||
'/sitemap.xml'
|
||||
end
|
||||
|
||||
route %r{^/(404)/$} do
|
||||
item.identifier.chop + '.html'
|
||||
end
|
||||
|
||||
route '/posts/*' do
|
||||
y, m, d, slug = /([0-9]+)\-([0-9]+)\-([0-9]+)\-([^\/]+)/.match(item.identifier).captures
|
||||
"/#{y}/#{m}/#{d}/#{slug}/index.html"
|
||||
|
|
126
content/htaccess.txt
Normal file
126
content/htaccess.txt
Normal file
|
@ -0,0 +1,126 @@
|
|||
# ----------------------------------------------------------------------
|
||||
# Start rewrite engine
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
# Turning on the rewrite engine is necessary for the following rules and features.
|
||||
|
||||
<IfModule mod_rewrite.c>
|
||||
RewriteEngine On
|
||||
</IfModule>
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Suppress or force the "www." at the beginning of URLs
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
# The same content should never be available under two different URLs - especially not with and
|
||||
# without "www." at the beginning, since this can cause SEO problems (duplicate content).
|
||||
# That's why you should choose one of the alternatives and redirect the other one.
|
||||
|
||||
# By default option 1 (no "www.") is activated. Remember: Shorter URLs are sexier.
|
||||
# no-www.org/faq.php?q=class_b
|
||||
|
||||
# If you rather want to use option 2, just comment out all option 1 lines
|
||||
# and uncomment option 2.
|
||||
# IMPORTANT: NEVER USE BOTH RULES AT THE SAME TIME!
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
<IfModule mod_rewrite.c>
|
||||
RewriteCond %{HTTPS} !=on
|
||||
RewriteCond %{HTTP_HOST} ^www\.(.+)$ [NC]
|
||||
RewriteRule ^(.*)$ http://%1/$1 [R=301,L]
|
||||
</IfModule>
|
||||
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Add/remove trailing slash to (non-file) URLs
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
# Google treats URLs with and without trailing slashes separately.
|
||||
# Forcing a trailing slash is usually preferred, but all that's really
|
||||
# important is that one correctly redirects to the other.
|
||||
|
||||
# By default option 1 (force trailing slash) is activated.
|
||||
# http://googlewebmastercentral.blogspot.com/2010/04/to-slash-or-not-to-slash.html
|
||||
# http://www.alistapart.com/articles/slashforward/
|
||||
# http://httpd.apache.org/docs/2.0/misc/rewriteguide.html#url Trailing Slash Problem
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
<IfModule mod_rewrite.c>
|
||||
RewriteCond %{REQUEST_FILENAME} !-f
|
||||
RewriteCond %{REQUEST_URI} !(\.[a-zA-Z0-9]{1,5}|/|#(.*))$
|
||||
RewriteRule ^(.*)$ /$1/ [R=301,L]
|
||||
</IfModule>
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
# Option 2:
|
||||
# Rewrite "domain.com/foo/ -> domain.com/foo"
|
||||
|
||||
#<IfModule mod_rewrite.c>
|
||||
# RewriteRule ^(.*)/$ /$1 [R=301,L]
|
||||
#</IfModule>
|
||||
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# Prevent 404 errors for non-existing redirected folders
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
# without -MultiViews, Apache will give a 404 for a rewrite if a folder of the same name does not exist
|
||||
# e.g. /blog/hello : webmasterworld.com/apache/3808792.htm
|
||||
|
||||
Options -MultiViews
|
||||
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# custom 404 page
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
# You can add custom pages to handle 500 or 403 pretty easily, if you like.
|
||||
ErrorDocument 404 /404.html
|
||||
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# UTF-8 encoding
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
# use utf-8 encoding for anything served text/plain or text/html
|
||||
AddDefaultCharset utf-8
|
||||
|
||||
# force utf-8 for a number of file formats
|
||||
AddCharset utf-8 .html .css .js .xml .json .rss
|
||||
|
||||
|
||||
|
||||
# ----------------------------------------------------------------------
|
||||
# A little more security
|
||||
# ----------------------------------------------------------------------
|
||||
|
||||
|
||||
# Do we want to advertise the exact version number of Apache we're running?
|
||||
# Probably not.
|
||||
## This can only be enabled if used in httpd.conf - It will not work in .htaccess
|
||||
ServerTokens Prod
|
||||
|
||||
|
||||
# "-Indexes" will have Apache block users from browsing folders without a default document
|
||||
# Usually you should leave this activated, because you shouldn't allow everybody to surf through
|
||||
# every folder on your server (which includes rather private places like CMS system folders).
|
||||
Options -Indexes
|
||||
|
||||
|
||||
# Block access to "hidden" directories whose names begin with a period. This
|
||||
# includes directories used by version control systems such as Subversion or Git.
|
||||
<IfModule mod_rewrite.c>
|
||||
RewriteRule "(^|/)\." - [F]
|
||||
</IfModule>
|
||||
|
||||
<% if @site.config[:redirects] %>
|
||||
# Set up URL redirects<% @site.config[:redirects].each do |h| %>
|
||||
Redirect 301 <%= h[:from] %> <%= h[:to] %>
|
||||
<% end %><% end %>
|
|
@ -15,6 +15,7 @@ unless defined? LOADED_DEFAULT_CONFIG
|
|||
include Nanoc3::Helpers::HTMLEscape
|
||||
include Nanoc3::Helpers::Rendering
|
||||
include Nanoc3::Helpers::LinkTo
|
||||
include Nanoc3::Helpers::XMLSitemap
|
||||
|
||||
# cachebuster
|
||||
require 'nanoc/cachebuster'
|
||||
|
|
92
lib/preprocessors.rb
Normal file
92
lib/preprocessors.rb
Normal file
|
@ -0,0 +1,92 @@
|
|||
# Preprocessor helpers
|
||||
#
|
||||
# This file has a collection of methods that are meant to be used in the
|
||||
# preprocess-block in the Nanoc Rules file.
|
||||
#
|
||||
# @author Arjan van der Gaag
|
||||
|
||||
|
||||
# Generate a sitemap.xml file using Nanoc's own xml_sitemap helper method by
|
||||
# dynamically adding a new item.
|
||||
#
|
||||
# Make items that should not appear in the sitemap hidden. This by default
|
||||
# works on all image files and typical assets, as well as error pages and
|
||||
# htaccess. The is_hidden attribute is only explicitly set if it is absent,
|
||||
# allowing per-file overriding.
|
||||
#
|
||||
# @todo extract hidden file types into configuration file?
|
||||
def create_sitemap
|
||||
return unless @site.config[:output_generated_assets]
|
||||
|
||||
@items.each do |item|
|
||||
if %w{png gif jpg jpeg coffee scss sass less css xml js txt ico}.include?(item[:extension]) ||
|
||||
item.identifier =~ /404|500|htaccess/
|
||||
item[:is_hidden] = true unless item.attributes.has_key?(:is_hidden)
|
||||
end
|
||||
end
|
||||
@items << Nanoc3::Item.new(
|
||||
"<%= xml_sitemap %>",
|
||||
{ :extension => 'xml', :is_hidden => true },
|
||||
'/sitemap/'
|
||||
)
|
||||
end
|
||||
|
||||
# Use special settings from the site configuration to generate the files
|
||||
# necessary for various webmaster tools authentications, such as the services
|
||||
# from Google, Yahoo and Bing.
|
||||
#
|
||||
# This loops through all the items in the `webmaster_tools` setting, using
|
||||
# its properties to generate a new item.
|
||||
#
|
||||
# See config.yaml for more documentation on the input format.
|
||||
def create_webmaster_tools_authentications
|
||||
return unless @site.config[:output_generated_assets]
|
||||
|
||||
@site.config[:webmaster_tools].each do |file|
|
||||
next if file[:identifier].nil?
|
||||
content = file.delete(:content)
|
||||
identifier = file.delete(:identifier)
|
||||
file.merge({ :is_hidden => true })
|
||||
@items << Nanoc3::Item.new(
|
||||
content,
|
||||
file,
|
||||
identifier
|
||||
)
|
||||
end
|
||||
end
|
||||
|
||||
# Generate a robots.txt file in the root of the site by dynamically creating
|
||||
# a new item.
|
||||
#
|
||||
# This will either output a default robots.txt file, that disallows all
|
||||
# assets except images, and points to the sitemap file.
|
||||
#
|
||||
# You can override the contents of the output of this method using the site
|
||||
# configuration, specifying Allow and Disallow directives. See the config.yaml
|
||||
# file for more information on the expected input format.
|
||||
def create_robots_txt
|
||||
return unless @site.config[:output_generated_assets]
|
||||
|
||||
if @site.config[:robots]
|
||||
content = if @site.config[:robots][:default]
|
||||
<<-EOS
|
||||
User-agent: *
|
||||
Disallow: /assets
|
||||
Allow: /assets/images
|
||||
Sitemap: #{@site.config[:base_url]}/sitemap.xml
|
||||
EOS
|
||||
else
|
||||
[
|
||||
'User-Agent: *',
|
||||
@site.config[:robots][:disallow].map { |l| "Disallow: #{l}" },
|
||||
(@site.config[:robots][:allow] || []).map { |l| "Allow: #{l}" },
|
||||
"Sitemap: #{@site.config[:robots][:sitemap]}"
|
||||
].flatten.compact.join("\n")
|
||||
end
|
||||
@items << Nanoc3::Item.new(
|
||||
content,
|
||||
{ :extension => 'txt', :is_hidden => true },
|
||||
'/robots/'
|
||||
)
|
||||
end
|
||||
end
|
31
lib/shellcmd_filter.rb
Normal file
31
lib/shellcmd_filter.rb
Normal file
|
@ -0,0 +1,31 @@
|
|||
require 'open3'
|
||||
|
||||
# This nanoc filter is a general purpose filter that simply pipes
|
||||
# the contents of an item into a given shell command, and sets
|
||||
# the items output to the output of it.
|
||||
#
|
||||
# It is NOT safe to use on large inputs, which will cause I/O
|
||||
# deadlocks. Any safer implementation is encouraged.
|
||||
#
|
||||
# Usage:
|
||||
#
|
||||
# compile '/static/js/*/' do
|
||||
# # minify JS :)
|
||||
# filter :shellcmd, "java -jar js-compiler.jar"
|
||||
# end
|
||||
#
|
||||
# Written by Vincent Driessen (http://twitter.com/nvie) and
|
||||
# released to the public domain.
|
||||
#
|
||||
# http://nvie.com
|
||||
class ShellCmdFilter < Nanoc3::Filter
|
||||
identifier :shellcmd
|
||||
|
||||
def run(content, params={ :cmd => "sed s/foo/bar/" })
|
||||
Open3.popen3(params[:cmd]) do |stdin, stdout, stderr|
|
||||
stdin.write(content)
|
||||
stdin.close()
|
||||
stdout.read()
|
||||
end
|
||||
end
|
||||
end
|
37
nanoc.yaml
37
nanoc.yaml
|
@ -24,6 +24,43 @@ title: 'ariejan.net'
|
|||
author_name: 'Ariejan de Vroom'
|
||||
author_uri: 'http://ariejan.net'
|
||||
|
||||
|
||||
# Configure the robots.txt file for this site.
|
||||
# Setting 'default' to true-ish will use sensible defaults. If you
|
||||
# wish to customize it, you can list paths to allow and to disallow.
|
||||
# Finally, you could manually set the path to the sitemap file.
|
||||
#
|
||||
# You can customize the robots file fairly well like this, but you
|
||||
# can always manually create a content file with the exact contents
|
||||
# you need.
|
||||
robots:
|
||||
default: true # disallow assets, allow assets/images and point at sitemap
|
||||
# disallow:
|
||||
# - '/tag'
|
||||
# - '/newsletter'
|
||||
# allow:
|
||||
# - '/tag/foo'
|
||||
# sitemap: '/site-map.txt'
|
||||
|
||||
# Set up authentication files for various webmaster tools (or something
|
||||
# similar). This simply creates a plain text file when generating the site.
|
||||
#
|
||||
# identifier: identifier of the output file, e.g. '/google12345/'
|
||||
# content: content of the file, e.g. 'aoa8202ns001'
|
||||
# extension: extension of the output file, e.g. 'html' or 'xml'
|
||||
webmaster_tools:
|
||||
-
|
||||
identifier:
|
||||
content:
|
||||
extension:
|
||||
|
||||
# Should Nanoc generate some standard asset files for you, or skip them
|
||||
# completely?
|
||||
#
|
||||
# This currently controls the generation of robots.txt, sitemap.xml,
|
||||
# sitemap.xml.gz and webmaster tools authentication files.
|
||||
output_generated_assets: true
|
||||
|
||||
prune:
|
||||
# Whether to automatically remove files not managed by nanoc from the output
|
||||
# directory. For safety reasons, this is turned off by default.
|
||||
|
|
Loading…
Reference in New Issue
Block a user