-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathPageIndexMain.java
More file actions
115 lines (98 loc) · 4.23 KB
/
PageIndexMain.java
File metadata and controls
115 lines (98 loc) · 4.23 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
package com.pageindex;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.pageindex.model.Config;
import com.pageindex.utils.ConfigLoader;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.concurrent.CompletableFuture;
/**
* PageIndex 命令行入口
*/
public class PageIndexMain {
private static final Logger logger = LoggerFactory.getLogger(PageIndexMain.class);
private static final ObjectMapper mapper = new ObjectMapper();
public static void main(String[] args) {
if (args.length == 0) {
printUsage();
return;
}
String pdfPath = null;
String configPath = null;
String outputPath = "./results";
// 解析命令行参数
for (int i = 0; i < args.length; i++) {
switch (args[i]) {
case "--pdf_path":
if (i + 1 < args.length) {
pdfPath = args[++i];
}
break;
case "--config":
if (i + 1 < args.length) {
configPath = args[++i];
}
break;
case "--output":
if (i + 1 < args.length) {
outputPath = args[++i];
}
break;
case "--help":
case "-h":
printUsage();
return;
}
}
if (pdfPath == null) {
System.err.println("Error: --pdf_path is required");
printUsage();
return;
}
try {
// 加载配置
Config config = configPath != null
? ConfigLoader.loadConfig(configPath)
: ConfigLoader.loadDefaultConfig();
// 创建 PageIndex 实例
PageIndex pageIndex = new PageIndex(config);
// 构建索引
System.out.println("Processing PDF: " + pdfPath);
CompletableFuture<PageIndex.IndexResult> future = pageIndex.buildIndex(pdfPath);
PageIndex.IndexResult result = future.join();
// 保存结果
File outputDir = new File(outputPath);
if (!outputDir.exists()) {
outputDir.mkdirs();
}
String pdfName = new File(pdfPath).getName();
String outputFileName = pdfName.replaceAll("\\.pdf$", "") + "_structure.json";
File outputFile = new File(outputDir, outputFileName);
mapper.writerWithDefaultPrettyPrinter().writeValue(outputFile, result);
System.out.println("Tree structure saved to: " + outputFile.getAbsolutePath());
} catch (Exception e) {
logger.error("Failed to process PDF", e);
System.err.println("Error: " + e.getMessage());
e.printStackTrace();
System.exit(1);
}
}
private static void printUsage() {
System.out.println("PageIndex Java - A vectorless, reasoning-based RAG system");
System.out.println();
System.out.println("Usage:");
System.out.println(" java -jar pageindex-java.jar --pdf_path <path> [options]");
System.out.println();
System.out.println("Options:");
System.out.println(" --pdf_path <path> Path to the PDF file (required)");
System.out.println(" --config <path> Path to config.yaml (optional, defaults to classpath)");
System.out.println(" --output <path> Output directory (default: ./results)");
System.out.println(" --help, -h Show this help message");
System.out.println();
System.out.println("Example:");
System.out.println(" java -jar pageindex-java.jar --pdf_path document.pdf");
System.out.println(" java -jar pageindex-java.jar --pdf_path document.pdf --config custom-config.yaml --output ./output");
}
}