Initial commit
This commit is contained in:
7
.editorconfig
Normal file
7
.editorconfig
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
root = true
|
||||||
|
|
||||||
|
[*]
|
||||||
|
end_of_line = lf
|
||||||
|
indent_style = tab
|
||||||
|
insert_final_newline = true
|
||||||
|
trim_trailing_whitespace = true
|
||||||
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
/target
|
||||||
42
Cargo.lock
generated
Normal file
42
Cargo.lock
generated
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# This file is automatically @generated by Cargo.
|
||||||
|
# It is not intended for manual editing.
|
||||||
|
version = 3
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "aho-corasick"
|
||||||
|
version = "0.7.18"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
|
||||||
|
dependencies = [
|
||||||
|
"memchr",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "memchr"
|
||||||
|
version = "2.4.1"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pdfextractor"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"regex",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex"
|
||||||
|
version = "1.5.5"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "1a11647b6b25ff05a515cb92c365cec08801e83423a235b51e231e1808747286"
|
||||||
|
dependencies = [
|
||||||
|
"aho-corasick",
|
||||||
|
"memchr",
|
||||||
|
"regex-syntax",
|
||||||
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "regex-syntax"
|
||||||
|
version = "0.6.25"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
|
||||||
9
Cargo.toml
Normal file
9
Cargo.toml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
[package]
|
||||||
|
name = "pdfextractor"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
regex = "*"
|
||||||
1
rustfmt.toml
Normal file
1
rustfmt.toml
Normal file
@@ -0,0 +1 @@
|
|||||||
|
hard_tabs = true
|
||||||
60
src/main.rs
Normal file
60
src/main.rs
Normal file
@@ -0,0 +1,60 @@
|
|||||||
|
use std::{
|
||||||
|
env::args,
|
||||||
|
fs::File,
|
||||||
|
io::{Read, Write},
|
||||||
|
path::PathBuf,
|
||||||
|
};
|
||||||
|
|
||||||
|
use regex::bytes::Regex;
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let path = args().nth(1).expect("no file given");
|
||||||
|
let mut data = Vec::new();
|
||||||
|
File::open(path)
|
||||||
|
.expect("Cannot open file")
|
||||||
|
.read_to_end(&mut data)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let re_obj_begin = Regex::new(r"\d+ \d+ obj\n").unwrap();
|
||||||
|
let re_line = Regex::new(r"[^\n]*\n").unwrap();
|
||||||
|
let re_len = regex::Regex::new(r"/Length (\d+)").unwrap();
|
||||||
|
|
||||||
|
let mut data = &data[..];
|
||||||
|
let mut filecount: usize = 0;
|
||||||
|
loop {
|
||||||
|
let m = match re_obj_begin.find(&data) {
|
||||||
|
Some(m) => m,
|
||||||
|
None => break,
|
||||||
|
};
|
||||||
|
data = &data[m.end()..];
|
||||||
|
let m = re_line.find(&data).unwrap();
|
||||||
|
let line = String::from_utf8(m.as_bytes().to_owned()).unwrap();
|
||||||
|
data = &data[m.end()..];
|
||||||
|
if !line.contains("/Image") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if !line.contains("/Type /XObject") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let length = re_len
|
||||||
|
.captures(&line)
|
||||||
|
.unwrap()
|
||||||
|
.get(1)
|
||||||
|
.unwrap()
|
||||||
|
.as_str()
|
||||||
|
.parse()
|
||||||
|
.unwrap();
|
||||||
|
data = &data[7..];
|
||||||
|
let stream = &data[..length];
|
||||||
|
data = &data[length..];
|
||||||
|
File::create(
|
||||||
|
["out".to_string(), filecount.to_string()]
|
||||||
|
.iter()
|
||||||
|
.collect::<PathBuf>(),
|
||||||
|
)
|
||||||
|
.unwrap()
|
||||||
|
.write_all(stream)
|
||||||
|
.unwrap();
|
||||||
|
filecount += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user