Friday, 22 September 2017

java - RANDOM_CODE_2

this code extract string starting with http, from files stored in zip file.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.Stream.Builder;
import java.util.zip.ZipEntry;
import java.util.zip.ZipFile;

import com.jaunt.NotFound;
import com.jaunt.ResponseException;

public class Toast {
 public static void main(String[] args) throws ResponseException, NotFound, IOException {
  Path root = Paths.get("");

  Stream<Path> strm1 = Files.find(root.resolve("pens"), 5, (f,ab) -> ab.isRegularFile() && f.getFileName().toString().endsWith(".zip"), new FileVisitOption[0]);
  Stream<Path> strm2 = Files.find(root.resolve("pens_categorised"), 5, (f,ab) -> ab.isRegularFile()  && f.getFileName().toString().endsWith(".zip"), new FileVisitOption[0]);

  int[] a = {0,0};

  String list = 
    Stream.concat(strm1,  strm2).flatMap(file -> {
     try {
      ZipFile zipfile = new ZipFile(file.toFile());
      Enumeration<? extends ZipEntry> entries = zipfile.entries();

      final Pattern pattern = Pattern.compile("['\"](http?.+?)['\"]");
      while(entries.hasMoreElements()) {
       final ZipEntry entry = entries.nextElement();

       if(!entry.isDirectory() && entry.getName().matches("(?i).+\\.(?:css|js|html)")) {
        a[0]++;
        InputStreamReader isr = new InputStreamReader(zipfile.getInputStream(entry), "utf-8");
        BufferedReader reader = new BufferedReader(isr);

        return reader.lines()
          .flatMap(s1 -> {
           Matcher m = pattern.matcher(s1);
           Builder<String> b = Stream.builder();
           while(m.find()) b.accept(m.group(1));
           return b.build();
          }).onClose(() -> {
           try {
            a[1]++;
            isr.close();
            reader.close();
            zipfile.close();
           } catch (IOException e1) {}
          });
       }
      }
     }
     catch (IOException e) {
      System.out.println(file+"  "+e);
     }
     return Stream.empty();
    })
    .distinct()
    .collect(Collectors.joining("\n"));

  Files.write(Paths.get("a.txt"), list.getBytes(), StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING);
  System.out.println("DONE"+Arrays.toString(a));
 }
}

No comments:

Post a Comment