- Removed 'txt init'
Format - New B32 ID Index - New option: txt index --print - Move scheme to peers - Replace peer.*.conf files with index packed locations Instead of adding a URL to peers.*.conf, run `txt pull <url>` Conversion - Rewritten converters - txt-convert looks for a .convert.conf containing `key: value` lines. - Specifiable topic-roots from .convert.conf. - Added `Topics:` key, with comma seperated topics. If set only those topics will appear in the main index and used as topic roots. Other topics will have sub-indices generated, but won't be listed in the main index. - HTML converter header & footer options - HTML-index renamed to HTM-index Internal - Change types: uuid:Uuid -> id:string - File_store merges identical texts - Use peer ID for store path, store peers' texts in .local/share/texts - Simple URN resolution for converter Continue to next feed if parsing one fails - Phasing-out Archive, replaced by improved packs - Eliminate Bos, Cohttp, lwt, uri, tls, Re, Ptime, dependencies - Lock version for Cmdliner, fix dune-project - Optional resursive store - Improve header_pack - Fix recursive mkdir git-svn-id: file:///srv/svn/repo/kosuzu/trunk@3 eb64cd80-c68d-6f47-b6a3-0ada418499da
This commit is contained in:
135
cli/convert.ml
135
cli/convert.ml
@@ -1,90 +1,67 @@
|
||||
open Logarion
|
||||
module A = Archive.Make (Logarion.File_store)
|
||||
|
||||
let convert_modified source dest fn title text =
|
||||
if (try Unix.((stat dest).st_mtime < (stat source).st_mtime) with _-> true)
|
||||
then (File_store.file dest (fn title text); true) else false
|
||||
let is_older source dest = try
|
||||
Unix.((stat dest).st_mtime < (stat source).st_mtime) with _-> true
|
||||
|
||||
let word_fname dir text = dir ^ "/" ^ Text.alias text
|
||||
let id_fname dir text = dir ^ "/" ^ Text.short_id text
|
||||
let convert cs r (text, files) = match Text.str "Content-Type" text with
|
||||
| "" | "text/plain" ->
|
||||
let source = List.hd files in
|
||||
let dest = Filename.concat r.Conversion.dir (Text.short_id text) in
|
||||
List.fold_left
|
||||
(fun a f ->
|
||||
let dest = dest ^ f.Conversion.ext in
|
||||
if is_older source dest then (File_store.file dest (f.Conversion.page r text); true) else false
|
||||
|| a)
|
||||
false cs
|
||||
| x -> Printf.eprintf "Can't convert Content-Type: %s file: %s" x text.Text.title; false
|
||||
|
||||
let writer types dir name (text,store_item) = (* todo: single_parser -> [files] *)
|
||||
(* convert_modified store_item idfilename (fun _title -> Text.to_string) text.title text;*)
|
||||
let h = if "htm" = types || "all" = types then
|
||||
convert_modified store_item (id_fname dir text ^ ".htm") Html.page name text
|
||||
else false in
|
||||
let g = if "gmi" = types || "all" = types then
|
||||
convert_modified store_item (id_fname dir text ^ ".gmi") Gemini.page name text
|
||||
else false in
|
||||
h || g
|
||||
let converters types kv =
|
||||
let t = [] in
|
||||
let t = if ("htm" = types || "all" = types) then
|
||||
(let htm = Html.init kv in
|
||||
Conversion.{ ext = Html.ext; page = Html.page htm; indices = Html.indices htm })::t
|
||||
else t in
|
||||
let t = if ("gmi" = types || "all" = types) then
|
||||
Conversion.{ ext = Gemini.ext; page = Gemini.page; indices = Gemini.indices}::t else t in
|
||||
t
|
||||
|
||||
let index_writer types noindex dir archive topic_roots topic_map texts =
|
||||
let name = archive.A.name in
|
||||
let file path = File_store.file (dir ^ path) in
|
||||
file "/index.pck" (Header_pack.pack archive texts);
|
||||
if not noindex && ("htm" = types || "all" = types) then (
|
||||
let index_name = try Store.KV.find "HTML-index" archive.File_store.kv
|
||||
with Not_found -> "index.html" in
|
||||
if index_name <> "" then
|
||||
file ("/"^index_name) (Html.topic_main_index name topic_roots texts);
|
||||
file "/index.date.htm" (Html.date_index name texts);
|
||||
List.iter
|
||||
(fun topic -> file ("/index." ^ topic ^ ".htm")
|
||||
(Html.topic_sub_index name topic_map topic texts))
|
||||
topic_roots;
|
||||
let base_url = try Store.KV.find "HTTP-URL" archive.File_store.kv
|
||||
with Not_found -> prerr_endline "Missing `HTTP-URL:` in config"; "" in
|
||||
file "/feed.atom" (Atom.feed archive.A.name archive.A.id base_url "text/html" texts)
|
||||
);
|
||||
if not noindex && ("gmi" = types || "all" = types) then (
|
||||
let index_name = try Store.KV.find "Gemini-index" archive.File_store.kv
|
||||
with Not_found -> "index.gmi" in
|
||||
if index_name <> "" then
|
||||
file ("/"^index_name) (Gemini.topic_main_index name topic_roots texts);
|
||||
file "/index.date.gmi" (Gemini.date_index name texts);
|
||||
List.iter
|
||||
(fun topic -> file ("/index." ^ topic ^ ".gmi")
|
||||
(Gemini.topic_sub_index name topic_map topic texts))
|
||||
topic_roots;
|
||||
let base_url = try Store.KV.find "GEMINI-URL" archive.File_store.kv
|
||||
with Not_found -> prerr_endline "Missing `GEMINI-URL:` in config"; "" in
|
||||
file "/gmi.atom" (Atom.feed archive.A.name archive.A.id base_url "text/gemini" texts)
|
||||
)
|
||||
|
||||
let txt_writer types dir name ((text, _store_item) as r) =
|
||||
match Text.str "Content-Type" text with
|
||||
| "" | "text/plain" -> writer types dir name r
|
||||
| x -> prerr_endline ("Can't convert Content-Type: "^x^" file: " ^text.Text.title); false
|
||||
|
||||
let convert_all types noindex dir archive =
|
||||
let name = archive.A.name in
|
||||
let fn (ts,ls,acc) ((elt,_) as r) =
|
||||
(Topic_set.to_map ts (Text.set "topics" elt)),
|
||||
elt::ls, if txt_writer types dir name r then acc+1 else acc in
|
||||
let convert_all converters noindex dir id kv =
|
||||
let empty = Topic_set.Map.empty in
|
||||
let topic_map, texts, count = A.(fold ~order:newest fn (empty,[],0) archive) in
|
||||
let topic_roots = Topic_set.roots topic_map in
|
||||
index_writer types noindex dir archive topic_roots topic_map texts;
|
||||
print_endline @@ "Converted: " ^ string_of_int (count)
|
||||
^ "\nIndexed: " ^ string_of_int (List.length texts);
|
||||
Ok ()
|
||||
let repo = Conversion.{ id; dir; kv; topic_roots = []; topics = empty; texts = [] } in
|
||||
let fn (ts,ls,acc) ((elt,_) as r) =
|
||||
(Topic_set.to_map ts (Text.set "topics" elt)), elt::ls,
|
||||
if convert converters repo r then acc+1 else acc in
|
||||
let topics, texts, count = File_store.(fold ~dir ~order:newest fn (empty,[],0)) in
|
||||
let topic_roots = try List.rev @@ String_set.list_of_csv (Store.KV.find "Topics" kv)
|
||||
with Not_found -> Topic_set.roots topics in
|
||||
let repo = Conversion.{ repo with topic_roots; topics; texts } in
|
||||
if not noindex then List.iter (fun c -> c.Conversion.indices repo) converters;
|
||||
Printf.printf "Converted: %d Indexed: %d\n" count (List.length texts)
|
||||
|
||||
let convert_dir types noindex cmd_dir =
|
||||
let (>>=) = Result.bind in
|
||||
let with_dir dir =
|
||||
Result.map_error (function `Msg m -> m)
|
||||
Logarion.File_store.Directory.(directory dir |> print ~descr:"export" dir) in
|
||||
(A.of_path "."
|
||||
>>= fun archive -> (match cmd_dir with "" -> Error "unspecified export dir" | x -> Ok x)
|
||||
>>= fun dir -> with_dir dir
|
||||
>>= fun _ -> convert_all types noindex dir { archive with store = dir })
|
||||
|> function Ok () -> () | Error x -> prerr_endline x
|
||||
let convert_dir types noindex dir =
|
||||
match dir with "" -> prerr_endline "unspecified dir"
|
||||
| dir ->
|
||||
let fname = Filename.concat dir "index.pck" in
|
||||
match Header_pack.of_string @@ File_store.to_string fname with
|
||||
| Error s -> prerr_endline s
|
||||
| Ok { info; _ } ->
|
||||
let kv = let f = Filename.concat dir ".convert.conf" in (* TODO: better place to store convert conf? *)
|
||||
if Sys.file_exists f then File_store.of_kv_file f else Store.KV.empty in
|
||||
let kv = if Store.KV.mem "Title" kv then kv
|
||||
else Store.KV.add "Title" info.Header_pack.title kv in
|
||||
let kv = Store.KV.add "Locations" (String.concat ";\n" info.Header_pack.locations) kv in
|
||||
let cs = converters types kv in
|
||||
convert_all cs noindex dir info.Header_pack.id kv
|
||||
|
||||
open Cmdliner
|
||||
|
||||
let term =
|
||||
let directory = Arg.(value & pos 0 string "" & info [] ~docv:"target directory" ~doc:"Directory to convert into") in
|
||||
let types = Arg.(value & opt string "all" & info ["t"; "type"] ~docv:"TYPES" ~doc:"Convert to type") in
|
||||
let noindex = Arg.(value & flag & info ["noindex"] ~doc:"don't write an index when converting") in
|
||||
let directory = Arg.(value & pos 0 string "" & info [] ~docv:"target directory"
|
||||
~doc:"Directory to convert") in
|
||||
let types = Arg.(value & opt string "all" & info ["t"; "type"] ~docv:"TYPES"
|
||||
~doc:"Convert to type") in
|
||||
let noindex = Arg.(value & flag & info ["noindex"]
|
||||
~doc:"don't create indices in target format") in
|
||||
Term.(const convert_dir $ types $ noindex $ directory),
|
||||
Term.info "convert" ~doc:"convert archive" ~man:[ `S "DESCRIPTION"; `P "Convert Logarion archive" ]
|
||||
Term.info "convert" ~doc:"convert txts"
|
||||
~man:[ `S "DESCRIPTION"; `P "Convert texts within a directory to another format.
|
||||
Directory must contain an index.pck. Run `txt index` first." ]
|
||||
|
||||
Reference in New Issue
Block a user