1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 | import java.io.BufferedReader; import java.io.IOException; import java.io.InputStreamReader; import java.nio.file.FileVisitOption; import java.nio.file.Files; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.util.Arrays; import java.util.Enumeration; import java.util.regex.Matcher; import java.util.regex.Pattern; import java.util.stream.Collectors; import java.util.stream.Stream; import java.util.stream.Stream.Builder; import java.util.zip.ZipEntry; import java.util.zip.ZipFile; import com.jaunt.NotFound; import com.jaunt.ResponseException; public class Toast { public static void main(String[] args) throws ResponseException, NotFound, IOException { Path root = Paths.get(""); Stream<Path> strm1 = Files.find(root.resolve("pens"), 5, (f,ab) -> ab.isRegularFile() && f.getFileName().toString().endsWith(".zip"), new FileVisitOption[0]); Stream<Path> strm2 = Files.find(root.resolve("pens_categorised"), 5, (f,ab) -> ab.isRegularFile() && f.getFileName().toString().endsWith(".zip"), new FileVisitOption[0]); int[] a = {0,0}; String list = Stream.concat(strm1, strm2).flatMap(file -> { try { ZipFile zipfile = new ZipFile(file.toFile()); Enumeration<? extends ZipEntry> entries = zipfile.entries(); final Pattern pattern = Pattern.compile("['\"](http?.+?)['\"]"); while(entries.hasMoreElements()) { final ZipEntry entry = entries.nextElement(); if(!entry.isDirectory() && entry.getName().matches("(?i).+\\.(?:css|js|html)")) { a[0]++; InputStreamReader isr = new InputStreamReader(zipfile.getInputStream(entry), "utf-8"); BufferedReader reader = new BufferedReader(isr); return reader.lines() .flatMap(s1 -> { Matcher m = pattern.matcher(s1); Builder<String> b = Stream.builder(); while(m.find()) b.accept(m.group(1)); return b.build(); }).onClose(() -> { try { a[1]++; isr.close(); reader.close(); zipfile.close(); } catch (IOException e1) {} }); } } } catch (IOException e) { System.out.println(file+" "+e); } return Stream.empty(); }) .distinct() .collect(Collectors.joining("\n")); Files.write(Paths.get("a.txt"), list.getBytes(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING); System.out.println("DONE"+Arrays.toString(a)); } } |
Friday, 22 September 2017
java - RANDOM_CODE_2
this code extract string starting with http, from files stored in zip file.
Subscribe to:
Post Comments (Atom)
No comments:
Post a Comment