vqa_re_convert.py 2.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. # copyright (c) 2022 PaddlePaddle Authors. All Rights Reserve.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. import numpy as np
  15. class TensorizeEntitiesRelations(object):
  16. def __init__(self, max_seq_len=512, infer_mode=False, **kwargs):
  17. self.max_seq_len = max_seq_len
  18. self.infer_mode = infer_mode
  19. def __call__(self, data):
  20. entities = data['entities']
  21. relations = data['relations']
  22. entities_new = np.full(
  23. shape=[self.max_seq_len + 1, 3], fill_value=-1, dtype='int64')
  24. entities_new[0, 0] = len(entities['start'])
  25. entities_new[0, 1] = len(entities['end'])
  26. entities_new[0, 2] = len(entities['label'])
  27. entities_new[1:len(entities['start']) + 1, 0] = np.array(entities[
  28. 'start'])
  29. entities_new[1:len(entities['end']) + 1, 1] = np.array(entities['end'])
  30. entities_new[1:len(entities['label']) + 1, 2] = np.array(entities[
  31. 'label'])
  32. relations_new = np.full(
  33. shape=[self.max_seq_len * self.max_seq_len + 1, 2],
  34. fill_value=-1,
  35. dtype='int64')
  36. relations_new[0, 0] = len(relations['head'])
  37. relations_new[0, 1] = len(relations['tail'])
  38. relations_new[1:len(relations['head']) + 1, 0] = np.array(relations[
  39. 'head'])
  40. relations_new[1:len(relations['tail']) + 1, 1] = np.array(relations[
  41. 'tail'])
  42. data['entities'] = entities_new
  43. data['relations'] = relations_new
  44. return data