You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

utils.py 2.5KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687
  1. import concurrent.futures
  2. import concurrent.futures
  3. import time
  4. from urllib.error import HTTPError
  5. from urllib.request import urlretrieve
  6. from torch.utils.data import IterableDataset
  7. from tqdm import tqdm
  8. from config import Config
  9. def find_in_log_n(start, end, func, bias=0.3):
  10. if end - start <= 1:
  11. return start
  12. mid = int(start * (1 - bias) + end * bias)
  13. if start == mid:
  14. mid += 1
  15. if func(mid):
  16. return find_in_log_n(mid, end, func)
  17. else:
  18. return find_in_log_n(start, mid, func)
  19. def fetch_tile_content(tile_url, retry=15):
  20. for i in range(retry):
  21. try:
  22. image_path = urlretrieve(tile_url)[0]
  23. with open(image_path, "rb") as file:
  24. return file.read()
  25. except Exception as e:
  26. print("e", end="|")
  27. time.sleep(2 ** (0.3 * (i + 1)))
  28. if i == retry - 1:
  29. if input("continue") == "y":
  30. return fetch_tile_content(tile_url, retry)
  31. raise e
  32. raise HTTPError("Not able for fetch image tile", code=500, msg="", hdrs={}, fp=None)
  33. def download_urls_in_thread(url_and_index_list):
  34. def download(args):
  35. url, index = args
  36. file_content = fetch_tile_content(url)
  37. return file_content, index
  38. with concurrent.futures.ThreadPoolExecutor(max_workers=Config.workers) as executor:
  39. for tile, i in tqdm(executor.map(download, url_and_index_list), total=len(url_and_index_list)):
  40. yield tile, i
  41. def _get_alignment_sore_and_percent(seq1, seq2, match_score=2, mismatch_score=-1, gap_score=-1):
  42. from alignment.sequence import Sequence
  43. from alignment.sequencealigner import SimpleScoring, GlobalSequenceAligner
  44. from alignment.vocabulary import Vocabulary
  45. a = Sequence(seq1)
  46. b = Sequence(seq2)
  47. v = Vocabulary()
  48. aEncoded = v.encodeSequence(a)
  49. bEncoded = v.encodeSequence(b)
  50. scoring = SimpleScoring(match_score, mismatch_score)
  51. aligner = GlobalSequenceAligner(scoring, gap_score)
  52. score = aligner.align(aEncoded, bEncoded, backtrace=False)
  53. return score
  54. def get_normalized_score(seq1, seq2):
  55. score = _get_alignment_sore_and_percent(seq1, seq2)
  56. return score / (len(seq2) + len(seq1))
  57. class DatasetWithGenerator(IterableDataset):
  58. def __init__(self, generator):
  59. self.generator = generator
  60. def __iter__(self):
  61. return self.generator
  62. if __name__ == '__main__':
  63. import math
  64. print(math.log2(1000 * 1000))
  65. print(find_in_log_n(0, 100, lambda x: x <= 76))