package main import ( "fmt" "github.com/andybalholm/cascadia" "golang.org/x/net/html" "io" "log" "net/http" "sort" "strings" ) var permittedtags = []string{"div", "h1", "h2", "table", "tr", "td", "p", "span", "pre", "blockquote", "strong", "em", "b", "i", "ol", "ul", "li"} func permitted(tag string) bool { idx := sort.SearchStrings(permittedtags, tag) return idx < len(permittedtags) && permittedtags[idx] == tag } func getattr(node *html.Node, attr string) string { for _, a := range node.Attr { if a.Key == attr { return a.Val } } return "" } func clean(w io.Writer, node *html.Node) { switch node.Type { case html.ElementNode: tag := node.Data if tag == "a" { w.Write([]byte(fmt.Sprintf(``, html.EscapeString(getattr(node, "href"))))) } else if tag == "img" { w.Write([]byte(fmt.Sprintf(``, html.EscapeString(getattr(node, "src"))))) } else if permitted(tag) { w.Write([]byte(fmt.Sprintf("<%s>", tag))) } case html.TextNode: w.Write([]byte(html.EscapeString(node.Data))) } for c := node.FirstChild; c != nil; c = c.NextSibling { clean(w, c) } if node.Type == html.ElementNode { tag := node.Data if tag == "a" || permitted(tag) { w.Write([]byte(fmt.Sprintf("", tag))) } if tag == "p" || tag == "div" { w.Write([]byte("\n")) } } } var prolog = []byte( ` `) func rerender(w io.Writer, root *html.Node) { sel := cascadia.MustCompile("div.section-content") divs := sel.MatchAll(root) w.Write(prolog) for _, div := range divs { clean(w, div) } } func refetch(w http.ResponseWriter, r *http.Request) { remurl := "https://medium.com" + r.URL.Path log.Println("fetching", remurl) resp, err := http.Get(remurl) if err != nil { log.Println("err", err) return } defer resp.Body.Close() if strings.HasPrefix(resp.Header.Get("Content-Type"), "text/html") { doc, err := html.Parse(resp.Body) if err != nil { return } rerender(w, doc) } else { w.Header().Set("Cache-Control", "max-age=360000") io.Copy(w, resp.Body) } } func main() { sort.Strings(permittedtags) mux := http.NewServeMux() mux.HandleFunc("/", refetch) http.ListenAndServe(":8090", mux) }