mediawiki转markdown(port to golang)
【摘要】 原PHP实现地址:https://github.com/philipashlock/mediawiki-to-markdown/blob/master/convert.php翻译到Golang的实现如下:package mainimport ( "bytes" "fmt" "github.com/docopt/docopt-go" "github.com/antchfx/xmlquery" ...
原PHP实现地址:
https://github.com/philipashlock/mediawiki-to-markdown/blob/master/convert.php
翻译到Golang的实现如下:
package main
import (
"bytes"
"fmt"
"github.com/docopt/docopt-go"
"github.com/antchfx/xmlquery"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/transform"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"regexp"
"strconv"
"strings"
"html"
)
var g_re = regexp.MustCompile("/\\[\\[(.+?)\\]\\]/")
func main() {
usage := `Wiki2Md.
Usage:
wiki2md --filename=<name> [--output=<export>] [--indexes=<true|false>] [--format=<markdown_phpextra|markdown_github>] [--frontmatter=<true|false>]
Options:
--filename==<filename> the name of the xml file you exported from MediaWiki.
--output==<output> specify an output folder since each wiki page in the XML file will generate it"s own separate markdown file.
--indexes==<indexes> set indexes as true if you want pages with the same name as a directory to be renamed as index.md and placed into their directory.
--frontmatter==<frontmatter> specify whether you want frontmatter included. This is automatically set to true when the output format is markdown_github.
--format==<format> specify different output formats with format. The default is markdown_github.[default: markdown_github]`
arguments, err := docopt.Parse(usage, nil, true, "blah 1.0", false)
if err != nil {
fmt.Println(err)
return
}
fmt.Println(arguments["--filename"])
if (arguments["--filename"]==nil || arguments["--filename"].(string) == "") {
fmt.Println("No input file specified. Use --filename=mediawiki.xml");
return
}
output_path := "";
if (arguments["--output"]!=nil && arguments["--output"].(string) != "") {
output_path = arguments["--output"].(string);
if _, err := os.Stat(output_path); os.IsNotExist(err) {
fmt.Println("Creating output directory %s", output_path);
err = os.MkdirAll(output_path, 0777)
if err != nil {
fmt.Println(err)
return
}
}
}
format := "markdown_github";
if (arguments["--format"] !=nil && arguments["--format"].(string) != "") {
format = arguments["--format"].(string);
}
add_meta := false;
if ((arguments["--frontmatter"] != nil && arguments["--frontmatter"].(string) != "") || ((arguments["--frontmatter"]==nil || arguments["--frontmatter"].(string) == "") && format == "markdown_github")) {
add_meta = true;
}
// Load XML file
buf, err := ioutil.ReadFile(arguments["--filename"].(string)); // just pass the file name
if err != nil {
fmt.Print(err)
}
file := string(buf);
xml := strings.ReplaceAll( file,"xmlns = ","ns = "); //string is a string that contains xml...
doc,err := xmlquery.Parse(strings.NewReader(xml))
result := xmlquery.Find(doc, "//page")
count := 0;
directory_list := map[string]bool{}
// Iterate through XML
for _,node := range result {
title := xmlquery.FindOne(node,"//title");
titleStr := title.InnerText();//title[0];
url := strings.Replace(titleStr," ", "_", -1);
url = strings.Replace(url,"\"", "", -1);
directory := "";
filename := url;
slash := strings.Index(url, "/");
if(slash != -1){
titleStr = strings.Replace(titleStr,"/", " ", -1);
directory = url[0:slash];
filename = url[slash+1:];
directory_list[directory] = true;
}
text := xmlquery.FindOne(node,"//revision//text");
textStr := text.InnerText();//text[0];
textStr = html.UnescapeString(textStr); // decode inline html
g_re = regexp.MustCompile("/\\[\\[(.+?)\\]\\]/")
textStr = g_re.ReplaceAllStringFunc(textStr,new_link); // adds leading slash to links, "absolute-path reference"
// prepare to append page title frontmatter to text
frontmatter := "";
if (add_meta) {
frontmatter = "---\n";
frontmatter += "title: "+titleStr+"\n";
frontmatter += "permalink: /"+url+"/\n";
frontmatter += "---\n\n";
}
//==================
//pandoc 01_overview.mediawiki -f mediawiki -t markdown -o Project-Handbook_01_Overview.md
var stdout bytes.Buffer
var stderr bytes.Buffer
Param := "-f mediawiki -t "+format;
params := []string{"/C", "pandoc"} //
params = append(params, strings.Split(Param, " ")...)
cmd := exec.Command("cmd", params...)
stdin, err := cmd.StdinPipe()
if err != nil {
fmt.Println(err) //replace with logger, or anything you want
}
//cmd.Stdout = os.Stdout
//cmd.Stderr = os.Stderr
cmd.Stdout = &stdout
cmd.Stderr = &stderr
fmt.Println("START") //for debug
if err = cmd.Start(); err != nil { //Use start, not run
fmt.Println("An error occured: ", err) //replace with logger, or anything you want
}
io.WriteString(stdin, textStr)
stdin.Close()
cmd.Wait()
fmt.Println("END") //for debug
bt,err := GbkToUtf8([]byte(stderr.String()))
bs := stdout.String()+string(bt);//
if err != nil {
fmt.Println("execbat:err"+err.Error()+"\n"+ bs)
} else {
fmt.Println( bs)
}
//====================
textStr = bs;
textStr = strings.Replace(textStr, "\\_", "_",-1);
if (add_meta) {
textStr = frontmatter + textStr;
}
if len(output_path)>0 && output_path[len(output_path)-1:] != "/" {
output_path = output_path + "/";
}
directory = output_path + directory;
// create directory if necessary
if(directory!="") {
if _, err := os.Stat(directory); os.IsNotExist(err) {
os.Mkdir(directory,0777);
}
if len(directory)>0 && directory[len(directory)-1:]!="/" {
directory = directory + "/";
}
}
// create file
pt := normalizePath(directory + filename + ".md")
err = ioutil.WriteFile(pt, []byte(textStr), 0755)
if err != nil {
fmt.Println(err)
return
}
count++;
}
// Rename and move files with the same name as directories
if (len(directory_list)!=0 && arguments["--indexes"]!=nil && arguments["--indexes"].(string)!="") {
directory_list_keys := make([]string, len(directory_list))
i := 0
for k := range directory_list {
directory_list_keys[i] = k
i++
}
for _,directory_name := range directory_list_keys {
if _, err := os.Stat(output_path+directory_name+".md"); os.IsNotExist(err) {
os.Rename(output_path+directory_name+".md", output_path+directory_name+"/index.md");
}
}
}
if (count > 0) {
fmt.Println(strconv.Itoa(count) + " files converted");
}
}
func GbkToUtf8(s []byte) ([]byte, error) {
reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder())
d, e := ioutil.ReadAll(reader)
if e != nil {
return nil, e
}
return d, nil
}
func new_link(m string) string {
matches := g_re.FindStringSubmatch(m)
if(strings.Index(matches[1], "|") != -1) {
new_link := strings.ReplaceAll(matches[1]," ", "_");
return "[[/"+new_link+"|{"+matches[1]+"}]]";
} else {
link := strings.TrimSpace((matches[1])[0:(strings.Index(matches[1], "|"))]);
link = "/" + strings.ReplaceAll(link," ", "_");
link_text := strings.TrimSpace((matches[1])[(strings.Index(matches[1], "|")+1):]);
return "[["+link+"|"+link_text+"]]";
}
}
func Explode(delimiter, text string) []string {
if len(delimiter) > len(text) {
return strings.Split(delimiter, text)
} else {
return strings.Split(text, delimiter)
}
}
func Implode(glue string, pieces []string) string {
return strings.Join(pieces, glue)
}
type stack []string
func (s stack) Push(v string) stack {
return append(s, v)
}
func (s stack) Pop() (stack, string) {
// FIXME: What do we do if the stack is empty, though?
l := len(s)
if l==0 {
return s,""
}
return s[:l-1], s[l-1]
}
func GetCurrentDirectory() string {
dir, err := filepath.Abs(filepath.Dir(os.Args[0])) //返回绝对路径 filepath.Dir(os.Args[0])去除最后一个元素的路径
if err != nil {
fmt.Println(err.Error())
}
return strings.Replace(dir, "\\", "/", -1) //将\替换成/
}
// Borrowed from http://php.net/manual/en/func.realpath.php
func normalizePath(path string) string {
if !strings.HasPrefix(path, ".") {
return path
}
return filepath.Join(GetCurrentDirectory(), path)
}
【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)