blog3000/Blog3000/Server/MiddleWares/SEOMgr.cs

360 lines
12 KiB
C#

using Microsoft.AspNetCore.Http;
using Microsoft.Extensions.Primitives;
using Microsoft.Extensions.DependencyInjection;
using System;
using System.Net;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
using Ganss.XSS;
using System.IO;
using Markdig;
using Markdig.Syntax;
using Blog3000.Shared;
using Microsoft.Extensions.Logging;
using AngleSharp;
using System.Net.Http;
using Microsoft.CodeAnalysis.CSharp.Syntax;
using Blog3000.Client;
namespace Blog3000.Server.MiddleWares
{
public class SEOMgr: IDisposable
{
private readonly ILogger logger;
private readonly BlogEnv blogEnv;
private readonly BlogPostRepo blogPostRepo;
private readonly string urlPrefix;
private readonly static object siteMapLock = new object();
private readonly string sitemapCacheFile;
private string siteMapCache;
//
// Update when SEO-Output changes
//
private static DateTime MINSEODATE = new DateTime(2020, 07, 03);
public SEOMgr(ILogger<SEOMgr> logger, BlogEnv blogEnv, BlogPostRepo blogPostRepo)
{
this.logger = logger;
this.blogEnv = blogEnv;
this.blogPostRepo = blogPostRepo;
this.urlPrefix = blogEnv.BlogConfig.VisibleUrlPrefix;
this.sitemapCacheFile = System.IO.Path.Combine(
(string)AppDomain.CurrentDomain.GetData("AppDataPath"),
"sitemapcache.xml");
blogPostRepo.BlogPostsChanged += BlogPostRepo_BlogPostsChanged;
BlogPostRepo_BlogPostsChanged(null, null);
}
public void Dispose()
{
blogPostRepo.BlogPostsChanged -= BlogPostRepo_BlogPostsChanged;
}
public string GetSiteMap()
{
if (siteMapCache == null)
{
UpdateSiteMap();
}
return siteMapCache;
}
public StringBuilder BuildBlogPostOutput(BlogPostHeader bp)
{
string markdown = blogPostRepo.GetText(bp);
var title = InputSanitizer.Default.SanitizeText(bp.Title, true);
var keywords = "";
if (bp.Topics != null)
{
foreach (var t in bp.Topics)
{
keywords += (t.Replace("'", "").Replace("#", "")) + ","; // TODO
}
}
keywords = InputSanitizer.Default.SanitizeText(keywords);
var abstr = "" + bp.Abstract;
abstr = InputSanitizer.Default.SanitizeText(abstr, true).Replace("'", ""); // TODO
string author = null;
if (!String.IsNullOrEmpty(bp.Author))
{
blogEnv.BlogConfig?.AuthorFullnames?.TryGetValue(bp.Author, out author);
author = InputSanitizer.Default.SanitizeText(author, true);
}
var mdPipeline = new MarkdownPipelineBuilder().UseAdvancedExtensions().Build();
var mdoc = Markdig.Markdown.Parse(markdown, mdPipeline);
// Extract imagerefs for rewriting
Dictionary<string, string> imgGeysByUrls = new Dictionary<string, string>();
foreach (var r in mdoc.GetLinkReferenceDefinitions().Links)
{
if (r.Value != null && r.Value.Url != null)
{
imgGeysByUrls.Add(r.Value.Url, r.Key);
}
}
// Render markdown, rewrite image-urls
var writer = new StringWriter();
var renderer = new Markdig.Renderers.HtmlRenderer(writer);
renderer.LinkRewriter += (a) =>
{
string imgKey;
if (imgGeysByUrls.TryGetValue(a, out imgKey))
{
return $"BlogPosts/{bp.Id}/imgref/{a}";
}
return a;
};
mdPipeline.Setup(renderer);
renderer.Render(mdoc);
// Create sanitzed html
var html = writer.ToString();
html = InputSanitizer.Default.SanitizeHtml(html);
var lang = "en";
if (!String.IsNullOrEmpty(bp.Lang))
{
lang = InputSanitizer.Default.SanitizeText(bp.Lang);
}
// Build final document
StringBuilder sb = new StringBuilder();
sb.AppendLine("");
sb.AppendLine($"<html lang='{lang}'>");
sb.AppendLine("<head>");
sb.AppendLine($"<title>{title}</title>");
sb.AppendLine("<meta charset='UTF-8'/>");
sb.AppendLine("<meta name='viewport' content='width=device-width, initial-scale=1.0'>");
sb.AppendLine($"<meta name='generator' content='blog3000 build:{BuildVersion.BUILD_DATE}'>");
sb.AppendLine($"<meta name='description' content='{abstr}'></meta>");
sb.AppendLine($"<meta name='keywords' content='{keywords}'></meta>");
if (!String.IsNullOrEmpty(author))
{
sb.AppendLine($"<meta name='author' content='{author}'></meta>");
}
sb.AppendLine("</head>");
sb.AppendLine("<body>");
sb.AppendLine($"<h1>{title}</h1>");
sb.AppendLine($"<hr/>");
sb.AppendLine($"<div><i>Abstract: {abstr}</i></div>");
if (!String.IsNullOrEmpty(author))
{
sb.AppendLine($"<div><i>Author: {author}</i></div>");
}
sb.AppendLine("<hr/>");
sb.AppendLine(html);
sb.AppendLine("</body></html>");
return sb;
}
private string CreateSitemap()
{
// Create list with blogpost-links
StringBuilder sb = new StringBuilder();
sb.AppendLine("<?xml version=\"1.0\" encoding=\"UTF-8\"?>");
sb.AppendLine("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">");
DateTime? latestTime = null;
// Articles
foreach (var p in blogPostRepo.GetHeaders().Where(x => "PUBLIC".Equals(x.Access)).OrderBy(x => x.Id))
{
sb.AppendLine(" <url>");
sb.AppendLine($" <loc>{urlPrefix}/viewer/{p.Id}</loc>");
var dt = blogPostRepo.GetFileModDate(p);
if (dt != null)
{
if (MINSEODATE.CompareTo(dt) > 0) dt = MINSEODATE;
sb.AppendLine($" <lastmod>{((DateTime)dt).ToString("o")}</lastmod>");
if (latestTime == null) latestTime = dt;
else if (dt.Value.CompareTo(latestTime) > 0) latestTime = dt;
}
sb.AppendLine(" <changefreq>weekly</changefreq>");
sb.AppendLine(" </url>");
}
// Index page
sb.AppendLine(" <url>");
sb.AppendLine($" <loc>{urlPrefix}/</loc>");
sb.AppendLine(" <changefreq>weekly</changefreq>");
sb.AppendLine(" <priority>0.1</priority>");
if (latestTime != null)
{
sb.AppendLine($" <lastmod>{((DateTime)latestTime).ToString("o")}</lastmod>");
}
sb.AppendLine(" </url>");
// Done
sb.AppendLine("</urlset>");
return sb.ToString();
}
private bool isUpdatingSitemap = false;
private void UpdateSiteMap()
{
lock (siteMapLock)
{
// Check if this thread is already rebuilding the sidemap and skip.
// (Most importang skips dual-notifiy of crawlers)
// C#-lock can be reentered by the thread keeping the lock.
// This may occurr here, when the BlogPostRepo calls changed, which
// is triggered by accessing the BlogPostRepo during the
// Sitemap-creation.
if (!isUpdatingSitemap)
{
isUpdatingSitemap = true;
try
{
var oldSm = siteMapCache;
if (String.IsNullOrEmpty(oldSm))
{
oldSm = LoadSitemapCache();
}
logger.LogInformation("Rebuilding Sitemap");
var sm = CreateSitemap();
siteMapCache = sm;
SaveSitemapCache(sm);
if (!String.Equals(sm, oldSm))
{
logger.LogInformation("Sitemap changed");
Task.Run(async () =>
{
await PingCrawlersAsync();
});
}
else
{
logger.LogInformation("Sitemap not changed");
}
}
finally
{
isUpdatingSitemap = false;
}
}
else
{
logger.LogInformation("Already rebuilding Sitemap. Skip");
}
}
}
private async Task<bool> PingCrawlersAsync()
{
if (blogEnv.BlogConfig.AllowSearchBots || blogEnv.BlogConfig.AllowSearchDevBot)
{
// Notify google
var n1 = Task.Run(async () =>
{
using (var httpClient = new HttpClient())
{
/*WebUtility.UrlEncode*/
var u = Uri.EscapeUriString($"http://www.google.com/ping?sitemap={(urlPrefix + "/sitemap.xml")}");
logger.LogInformation($"Notifying crawlers: Google ({u})");
using (var response = httpClient.GetAsync(u))
{
string code = response.Result.StatusCode.ToString();
string apiResponse = await response.Result.Content.ReadAsStringAsync();
logger.LogInformation($"Google responded: {code} / {apiResponse}");
}
}
});
// Notify bing
var n2 = Task.Run(async () =>
{
using (var httpClient = new HttpClient())
{
/*WebUtility.UrlEncode*/
var u = Uri.EscapeUriString($"http://www.bing.com/ping?sitemap={(urlPrefix + "/sitemap.xml")}");
logger.LogInformation($"Notifying crawlers: Bing ({u})");
using (var response = httpClient.GetAsync(u))
{
string code = response.Result.StatusCode.ToString();
string apiResponse = await response.Result.Content.ReadAsStringAsync();
logger.LogInformation($"Bing responded: {code} / {apiResponse}");
}
}
});
await Task.WhenAll(n1, n2);
return true;
}
return false;
}
private string LoadSitemapCache()
{
try
{
if (System.IO.File.Exists(sitemapCacheFile))
{
return System.IO.File.ReadAllText(sitemapCacheFile);
}
}
catch (Exception ex)
{
logger.LogWarning(ex, $"Error reading sitemapCacheFile ${sitemapCacheFile}");
}
return null;
}
private bool SaveSitemapCache(string sitemap)
{
try
{
System.IO.File.WriteAllText(sitemapCacheFile, sitemap);
return true;
}
catch (Exception ex)
{
logger.LogWarning(ex, $"Error writing sitemapCacheFile file ${sitemapCacheFile}");
}
return false;
}
private void BlogPostRepo_BlogPostsChanged(object sender, EventArgs e)
{
UpdateSiteMap();
}
}
}