μ κ· νν μ Pattern κ°μ²΄μ μμ± λΉμ©μ μ€μ΄μ
κ°μ²΄ μμ± λΉμ©μ΄ λΉμΌ κ°μ²΄
EffectiveJava item6 - λΆνμν κ°μ²΄ μμ±μ νΌνλΌ λΌλ λ΄μ©μ νμ¬ μ½λμ μ μ©ν λ΄μ©μ μ 리νμ¬ κ³΅μ ν©λλ€.
- μ€ν°λ μ 리 λ΄μ©
μ± μμλ μλμ κ°μ΄ μ€λͺ νκ³ μμ΅λλ€.
- κ°μ²΄ μμ± λΉμ©μ΄ λΉμΌ κ°μ²΄λ λ°λ³΅ν΄μ μμ±ν기보λ€, μΊμ±νμ¬κ°μ²΄λ₯Ό μ¬μ¬μ©νλ κ²μ΄ μ’λ€.
- κ°μ²΄ μμ± λΉμ©μ΄ λΉμΌ κ°μ²΄ = cpu 리μμ€ μ¬μ© ε€
- μ κ·ννμμ κ²½μ° κ°μ²΄ μμ± μ μμ± λΉμ©μ΄ λΉμΈλ€.
- κ°μ²΄ μμ± λΉμ©μ΄ λΉμΌ κ°μ²΄ = cpu 리μμ€ μ¬μ© ε€
μ κ· ννμμ ν΅ν΄ λ¬Έμμ΄μ κ²μ¬ν λ λ΄λΆ λ©μλλ₯Ό λ€μ¬λ€λ³΄λ©΄ μ κ·ννμμ νλΌλ―Έν°λ‘ λ°μ λ΄λΆμ μΌλ‘ Pattern μΈμ€ν΄μ€λ‘ μ»΄νμΌνλ κ²μ νμΈν μ μλ€.
μ½λ κ°μ νκΈ°
νμ¬μμ ν¬λ‘€λ§ν ν μ€νΈ λ°μ΄ν°μμ νμν νλͺ©λ§ μΆμΆνκΈ° μν΄ κΈ°μ‘΄μλ μλμ κ°μ΄ λ©μλλ₯Ό νΈμΆν λλ§λ€ Patternκ°μ²΄λ₯Ό μ»΄νμΌνμ¬ λΉν¨μ¨μ μΈ κ°μ²΄ μμ±μ νκ³ μμμ΅λλ€.
@Service
public class RegexService {
private RegexRepository regexRepository;
public RegexService(RegexRepository regexRepository) {
this.regexRepository = regexRepository;
}
/**
* email μΆμΆ
* @param s
*/
public void startTest() {
StopWatch stopWatch = new StopWatch();
stopWatch.start();
List<String> combinedText = getCombinedText();
for (String s : combinedText) {
extractEmail(s);
extractTel(s);
}
stopWatch.stop();
System.out.println("total Time = " + stopWatch.getTotalTimeMillis()); ;
}
private void extractEmail(String s) {
String regex = "[a-zA-Z0-9+-\\_.]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+";
Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
Matcher matcher = pattern.matcher(s);
if (matcher.find()) {
System.out.println("Full match Email : " + matcher.group(0));
}
}
/**
* tel μΆμΆ
* @param s
*/
private void extractTel(String s) {
String regex = "\\d{2,3}-\\d{3,4}-\\d{4}$";
Pattern pattern = Pattern.compile(regex, Pattern.MULTILINE);
Matcher matcher = pattern.matcher(s);
if (matcher.find()) {
System.out.println("Full match Tel : " + matcher.group(0));
}
}
/**
* table column νλμ λ°μ΄ν°λ‘ ν©μΉλ λ©μλ
* @return
*/
private List<String> getCombinedText() {
List<SeoulBidData> seoulBidDataList = regexRepository.findAll();
List<String> combinTxtList = new ArrayList<>();
seoulBidDataList.forEach(data -> {
StringBuilder sb = new StringBuilder();
sb.append(data.getPostId() + " ");
sb.append(data.getPostName() + " ");
sb.append(data.getCreateDate() + " ");
sb.append(data.getUpdateDate() + " ");
sb.append(data.getOrgan() + " ");
sb.append(data.getPosition() + " ");
sb.append(data.getTel() + " ");
sb.append(data.getEmail() + " ");
sb.append(data.getTagWord() + " ");
sb.append(data.getContents() + " ");
sb.append(data.getStartDate() + " ");
sb.append(data.getEndDate() + " ");
sb.append(data.getDateCheck() + " ");
sb.append(data.getFileName() + " ");
sb.append(data.getFileCount() + " ");
combinTxtList.add(sb.toString());
});
return combinTxtList;
}
}
Effective Javaμμ μκ°νλ κ²μ²λΌ κ°μ²΄ μμ±λΉμ©μ΄ ν° κ°μ²΄μΌ κ²½μ° μΊμ±νμ¬ κ°μ²΄λ₯Ό μ¬μ¬μ©νκΈ° μν΄ Patternκ°μ²΄λ₯Ό private static finalλ‘ μμν νμ¬ κ°μ²΄ μμ± λΉμ©μ κ°μμν¬ μ μμ΅λλ€.
package com.example.regextest;
import org.springframework.stereotype.Service;
import org.springframework.util.StopWatch;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Service
public class RegexService {
private RegexRepository regexRepository;
private static final Pattern EMAIL_PATTERN = Pattern.compile("\"[a-zA-Z0-9+-\\_.]+@[a-zA-Z0-9-]+\\.[a-zA-Z0-9-.]+\"");
private static final Pattern TEL_PATTERN = Pattern.compile("\\d{2,3}-\\d{3,4}-\\d{4}$");
public RegexService(RegexRepository regexRepository) {
this.regexRepository = regexRepository;
}
public void startTest() {
StopWatch stopWatch = new StopWatch();
stopWatch.start();
List<String> combinedText = getCombinedText();
for (String s : combinedText) {
extractEmail(s);
extractTel(s);
}
stopWatch.stop();
System.out.println("total Time = " + stopWatch.getTotalTimeMillis()); ;
}
private void extractEmail(String s) {
Matcher matcher = EMAIL_PATTERN.matcher(s);
if (matcher.find()) {
System.out.println("Full match Email : " + matcher.group(0));
}
}
private void extractTel(String s) {
Matcher matcher = TEL_PATTERN.matcher(s);
if (matcher.find()) {
System.out.println("Full match Tel : " + matcher.group(0));
}
}
private List<String> getCombinedText() {
List<SeoulBidData> seoulBidDataList = regexRepository.findAll();
List<String> combinTxtList = new ArrayList<>();
seoulBidDataList.forEach(data -> {
StringBuilder sb = new StringBuilder();
sb.append(data.getPostId() + " ");
sb.append(data.getPostName() + " ");
sb.append(data.getCreateDate() + " ");
sb.append(data.getUpdateDate() + " ");
sb.append(data.getOrgan() + " ");
sb.append(data.getPosition() + " ");
sb.append(data.getTel() + " ");
sb.append(data.getEmail() + " ");
sb.append(data.getTagWord() + " ");
sb.append(data.getContents() + " ");
sb.append(data.getStartDate() + " ");
sb.append(data.getEndDate() + " ");
sb.append(data.getDateCheck() + " ");
sb.append(data.getFileName() + " ");
sb.append(data.getFileCount() + " ");
combinTxtList.add(sb.toString());
});
return combinTxtList;
}
}
μμ λ°μ΄ν°λ 곡곡λ°μ΄ν° ν¬νΈμμ λ€μ΄λ‘λνμ¬ ν μ€νΈνμ΅λλ€.(https://www.data.go.kr/data/15072302/fileData.do)
μ΄ 36819건 λ°μ΄ν°λ‘ ν μ€νΈν κ²°κ³Ό μλμ κ°μ΄ 12.311μ΄ μλκ° κ°μ λ κ²μ νμΈν μ μμ΅λλ€.

