mediawiki转markdown(port to golang)
【摘要】 原PHP实现地址:https://github.com/philipashlock/mediawiki-to-markdown/blob/master/convert.php翻译到Golang的实现如下:package mainimport ( "bytes" "fmt" "github.com/docopt/docopt-go" "github.com/antchfx/xmlquery" ...
原PHP实现地址:
https://github.com/philipashlock/mediawiki-to-markdown/blob/master/convert.php
翻译到Golang的实现如下:
package main import ( "bytes" "fmt" "github.com/docopt/docopt-go" "github.com/antchfx/xmlquery" "golang.org/x/text/encoding/simplifiedchinese" "golang.org/x/text/transform" "io" "io/ioutil" "os" "os/exec" "path/filepath" "regexp" "strconv" "strings" "html" ) var g_re = regexp.MustCompile("/\\[\\[(.+?)\\]\\]/") func main() { usage := `Wiki2Md. Usage: wiki2md --filename=<name> [--output=<export>] [--indexes=<true|false>] [--format=<markdown_phpextra|markdown_github>] [--frontmatter=<true|false>] Options: --filename==<filename> the name of the xml file you exported from MediaWiki. --output==<output> specify an output folder since each wiki page in the XML file will generate it"s own separate markdown file. --indexes==<indexes> set indexes as true if you want pages with the same name as a directory to be renamed as index.md and placed into their directory. --frontmatter==<frontmatter> specify whether you want frontmatter included. This is automatically set to true when the output format is markdown_github. --format==<format> specify different output formats with format. The default is markdown_github.[default: markdown_github]` arguments, err := docopt.Parse(usage, nil, true, "blah 1.0", false) if err != nil { fmt.Println(err) return } fmt.Println(arguments["--filename"]) if (arguments["--filename"]==nil || arguments["--filename"].(string) == "") { fmt.Println("No input file specified. Use --filename=mediawiki.xml"); return } output_path := ""; if (arguments["--output"]!=nil && arguments["--output"].(string) != "") { output_path = arguments["--output"].(string); if _, err := os.Stat(output_path); os.IsNotExist(err) { fmt.Println("Creating output directory %s", output_path); err = os.MkdirAll(output_path, 0777) if err != nil { fmt.Println(err) return } } } format := "markdown_github"; if (arguments["--format"] !=nil && arguments["--format"].(string) != "") { format = arguments["--format"].(string); } add_meta := false; if ((arguments["--frontmatter"] != nil && arguments["--frontmatter"].(string) != "") || ((arguments["--frontmatter"]==nil || arguments["--frontmatter"].(string) == "") && format == "markdown_github")) { add_meta = true; } // Load XML file buf, err := ioutil.ReadFile(arguments["--filename"].(string)); // just pass the file name if err != nil { fmt.Print(err) } file := string(buf); xml := strings.ReplaceAll( file,"xmlns = ","ns = "); //string is a string that contains xml... doc,err := xmlquery.Parse(strings.NewReader(xml)) result := xmlquery.Find(doc, "//page") count := 0; directory_list := map[string]bool{} // Iterate through XML for _,node := range result { title := xmlquery.FindOne(node,"//title"); titleStr := title.InnerText();//title[0]; url := strings.Replace(titleStr," ", "_", -1); url = strings.Replace(url,"\"", "", -1); directory := ""; filename := url; slash := strings.Index(url, "/"); if(slash != -1){ titleStr = strings.Replace(titleStr,"/", " ", -1); directory = url[0:slash]; filename = url[slash+1:]; directory_list[directory] = true; } text := xmlquery.FindOne(node,"//revision//text"); textStr := text.InnerText();//text[0]; textStr = html.UnescapeString(textStr); // decode inline html g_re = regexp.MustCompile("/\\[\\[(.+?)\\]\\]/") textStr = g_re.ReplaceAllStringFunc(textStr,new_link); // adds leading slash to links, "absolute-path reference" // prepare to append page title frontmatter to text frontmatter := ""; if (add_meta) { frontmatter = "---\n"; frontmatter += "title: "+titleStr+"\n"; frontmatter += "permalink: /"+url+"/\n"; frontmatter += "---\n\n"; } //================== //pandoc 01_overview.mediawiki -f mediawiki -t markdown -o Project-Handbook_01_Overview.md var stdout bytes.Buffer var stderr bytes.Buffer Param := "-f mediawiki -t "+format; params := []string{"/C", "pandoc"} // params = append(params, strings.Split(Param, " ")...) cmd := exec.Command("cmd", params...) stdin, err := cmd.StdinPipe() if err != nil { fmt.Println(err) //replace with logger, or anything you want } //cmd.Stdout = os.Stdout //cmd.Stderr = os.Stderr cmd.Stdout = &stdout cmd.Stderr = &stderr fmt.Println("START") //for debug if err = cmd.Start(); err != nil { //Use start, not run fmt.Println("An error occured: ", err) //replace with logger, or anything you want } io.WriteString(stdin, textStr) stdin.Close() cmd.Wait() fmt.Println("END") //for debug bt,err := GbkToUtf8([]byte(stderr.String())) bs := stdout.String()+string(bt);// if err != nil { fmt.Println("execbat:err"+err.Error()+"\n"+ bs) } else { fmt.Println( bs) } //==================== textStr = bs; textStr = strings.Replace(textStr, "\\_", "_",-1); if (add_meta) { textStr = frontmatter + textStr; } if len(output_path)>0 && output_path[len(output_path)-1:] != "/" { output_path = output_path + "/"; } directory = output_path + directory; // create directory if necessary if(directory!="") { if _, err := os.Stat(directory); os.IsNotExist(err) { os.Mkdir(directory,0777); } if len(directory)>0 && directory[len(directory)-1:]!="/" { directory = directory + "/"; } } // create file pt := normalizePath(directory + filename + ".md") err = ioutil.WriteFile(pt, []byte(textStr), 0755) if err != nil { fmt.Println(err) return } count++; } // Rename and move files with the same name as directories if (len(directory_list)!=0 && arguments["--indexes"]!=nil && arguments["--indexes"].(string)!="") { directory_list_keys := make([]string, len(directory_list)) i := 0 for k := range directory_list { directory_list_keys[i] = k i++ } for _,directory_name := range directory_list_keys { if _, err := os.Stat(output_path+directory_name+".md"); os.IsNotExist(err) { os.Rename(output_path+directory_name+".md", output_path+directory_name+"/index.md"); } } } if (count > 0) { fmt.Println(strconv.Itoa(count) + " files converted"); } } func GbkToUtf8(s []byte) ([]byte, error) { reader := transform.NewReader(bytes.NewReader(s), simplifiedchinese.GBK.NewDecoder()) d, e := ioutil.ReadAll(reader) if e != nil { return nil, e } return d, nil } func new_link(m string) string { matches := g_re.FindStringSubmatch(m) if(strings.Index(matches[1], "|") != -1) { new_link := strings.ReplaceAll(matches[1]," ", "_"); return "[[/"+new_link+"|{"+matches[1]+"}]]"; } else { link := strings.TrimSpace((matches[1])[0:(strings.Index(matches[1], "|"))]); link = "/" + strings.ReplaceAll(link," ", "_"); link_text := strings.TrimSpace((matches[1])[(strings.Index(matches[1], "|")+1):]); return "[["+link+"|"+link_text+"]]"; } } func Explode(delimiter, text string) []string { if len(delimiter) > len(text) { return strings.Split(delimiter, text) } else { return strings.Split(text, delimiter) } } func Implode(glue string, pieces []string) string { return strings.Join(pieces, glue) } type stack []string func (s stack) Push(v string) stack { return append(s, v) } func (s stack) Pop() (stack, string) { // FIXME: What do we do if the stack is empty, though? l := len(s) if l==0 { return s,"" } return s[:l-1], s[l-1] } func GetCurrentDirectory() string { dir, err := filepath.Abs(filepath.Dir(os.Args[0])) //返回绝对路径 filepath.Dir(os.Args[0])去除最后一个元素的路径 if err != nil { fmt.Println(err.Error()) } return strings.Replace(dir, "\\", "/", -1) //将\替换成/ } // Borrowed from http://php.net/manual/en/func.realpath.php func normalizePath(path string) string { if !strings.HasPrefix(path, ".") { return path } return filepath.Join(GetCurrentDirectory(), path) }
【版权声明】本文为华为云社区用户转载文章,如果您发现本社区中有涉嫌抄袭的内容,欢迎发送邮件进行举报,并提供相关证据,一经查实,本社区将立刻删除涉嫌侵权内容,举报邮箱:
cloudbbs@huaweicloud.com
- 点赞
- 收藏
- 关注作者
评论(0)